Path: blob/main/tests/smoke/verify/pdf-text-position.test.ts
12925 views
/*1* pdf-text-position.test.ts2*3* Tests for the ensurePdfTextPositions verify predicate.4* Renders a fixture document and runs various assertions including expected failures.5*6* Copyright (C) 2020-2025 Posit Software, PBC7*/89import { testQuartoCmd } from "../../test.ts";10import { ensurePdfTextPositions, PdfTextPositionAssertion } from "../../verify-pdf-text-position.ts";11import { assert, AssertionError } from "testing/asserts";12import { join } from "../../../src/deno_ral/path.ts";13import { safeRemoveSync, safeExistsSync } from "../../../src/core/path.ts";1415const fixtureDir = "docs/verify/pdf-text-position";16const fixtureQmd = join(fixtureDir, "fixture.qmd");17const fixturePdf = join(fixtureDir, "fixture.pdf");1819/**20* Helper to assert that a function throws with error message matching a pattern21*/22async function assertThrowsWithPattern(23fn: () => Promise<void>,24pattern: RegExp,25description: string,26) {27let threw = false;28let errorMessage = "";29try {30await fn();31} catch (e) {32threw = true;33errorMessage = e instanceof Error ? e.message : String(e);34}3536assert(threw, `Expected to throw for: ${description}`);37assert(38pattern.test(errorMessage),39`Error message "${errorMessage}" did not match pattern ${pattern} for: ${description}`,40);41}4243// Test: Render fixture and run assertions44testQuartoCmd("render", [fixtureQmd, "--to", "typst"], [], {45teardown: async () => {46// Run the test assertions after render completes47await runPositiveTests();48await runExpectedFailureTests();49await runSemanticTagTests();50await runPageRoleTests();51await runEdgeOverrideTests();52await runDistanceConstraintTests();53await runDistanceConstraintErrorTests();54await runPageRoleWithEdgeTests();55await runCenterEdgeTests();5657// Cleanup58if (safeExistsSync(fixturePdf)) {59safeRemoveSync(fixturePdf);60}61},62});6364/**65* Test positive assertions that should pass66*/67async function runPositiveTests() {68// Test 1: Basic vertical ordering (header < title < h1 < body < footer)69// Note: Headers and footers are page decorations without MCIDs, use role: "Decoration"70const verticalOrdering = ensurePdfTextPositions(fixturePdf, [71{72subject: { text: "FIXTURE_HEADER_TEXT", role: "Decoration" },73relation: "above",74object: "FIXTURE_TITLE_TEXT",75},76{ subject: "FIXTURE_TITLE_TEXT", relation: "above", object: "FIXTURE_H1_TEXT" },77{ subject: "FIXTURE_H1_TEXT", relation: "above", object: "FIXTURE_BODY_P1_TEXT" },78{79subject: "FIXTURE_BODY_P1_TEXT",80relation: "above",81object: { text: "FIXTURE_FOOTER_TEXT", role: "Decoration" },82},83]);84await verticalOrdering.verify([]);8586// Test 2: Margin positioning - use topAligned since semantic bbox may span full width87// Note: rightOf may not work with semantic bboxes because body paragraph's bbox88// may include the full content width89const marginPositioning = ensurePdfTextPositions(fixturePdf, [90{ subject: "FIXTURE_MARGIN_TEXT", relation: "topAligned", object: "FIXTURE_BODY_P2_TEXT" },91]);92await marginPositioning.verify([]);9394// Test 3: Heading hierarchy95const headingHierarchy = ensurePdfTextPositions(fixturePdf, [96{ subject: "FIXTURE_H1_TEXT", relation: "above", object: "FIXTURE_H2_TEXT" },97{ subject: "FIXTURE_H2_TEXT", relation: "above", object: "FIXTURE_H3_TEXT" },98]);99await headingHierarchy.verify([]);100}101102/**103* Test expected failures - each should throw with specific error messages104*/105async function runExpectedFailureTests() {106// Error 1: Text not found107await assertThrowsWithPattern(108async () => {109const predicate = ensurePdfTextPositions(fixturePdf, [110{ subject: "NONEXISTENT_TEXT_12345", relation: "above", object: "FIXTURE_BODY_P1_TEXT" },111]);112await predicate.verify([]);113},114/Text not found.*NONEXISTENT_TEXT_12345/,115"Text not found error",116);117118// Error 1b: Ambiguous text (appears multiple times)119await assertThrowsWithPattern(120async () => {121const predicate = ensurePdfTextPositions(fixturePdf, [122// "paragraph" appears in multiple places in the fixture123{ subject: "paragraph", relation: "above", object: "FIXTURE_BODY_P1_TEXT" },124]);125await predicate.verify([]);126},127/paragraph.*ambiguous.*matches/i,128"Ambiguous text error",129);130131// Error 2: Unknown relation (Zod validation error)132await assertThrowsWithPattern(133async () => {134const predicate = ensurePdfTextPositions(fixturePdf, [135// Use type assertion for intentionally invalid relation to test error handling136{ subject: "FIXTURE_H1_TEXT", relation: "invalidRelation", object: "FIXTURE_BODY_P1_TEXT" } as PdfTextPositionAssertion,137]);138await predicate.verify([]);139},140/Assertion.*is invalid/,141"Unknown relation error",142);143144// Error 3: Different pages - comparing items on different pages should fail145await assertThrowsWithPattern(146async () => {147const predicate = ensurePdfTextPositions(fixturePdf, [148{ subject: "FIXTURE_H1_TEXT", relation: "above", object: "FIXTURE_PAGE2_BODY_TEXT" },149]);150await predicate.verify([]);151},152/Cannot compare positions.*page \d+.*page \d+/,153"Different pages error",154);155156// Error 4: Position assertion failed (wrong relation)157await assertThrowsWithPattern(158async () => {159const predicate = ensurePdfTextPositions(fixturePdf, [160// Footer is BELOW header, not above (both are Decorations)161{162subject: { text: "FIXTURE_FOOTER_TEXT", role: "Decoration" },163relation: "above",164object: { text: "FIXTURE_HEADER_TEXT", role: "Decoration" },165},166]);167await predicate.verify([]);168},169/Position assertion failed.*FIXTURE_FOOTER_TEXT.*NOT.*above/,170"Position assertion failed error",171);172173// Error 5: Negative assertion unexpectedly true174await assertThrowsWithPattern(175async () => {176const predicate = ensurePdfTextPositions(177fixturePdf,178[], // No positive assertions179[180// This IS true, so negative assertion should fail181{ subject: "FIXTURE_H1_TEXT", relation: "above", object: "FIXTURE_H2_TEXT" },182],183);184await predicate.verify([]);185},186/Negative assertion failed.*FIXTURE_H1_TEXT.*IS.*above/,187"Negative assertion unexpectedly true error",188);189190// Error 6: Role mismatch (wrong semantic role)191await assertThrowsWithPattern(192async () => {193const predicate = ensurePdfTextPositions(fixturePdf, [194// H1 is not a Figure195{ subject: { text: "FIXTURE_H1_TEXT", role: "Figure" }, relation: "above", object: "FIXTURE_BODY_P1_TEXT" },196]);197await predicate.verify([]);198},199/Role mismatch.*FIXTURE_H1_TEXT.*expected Figure.*got H1/,200"Role mismatch error",201);202}203204/**205* Test semantic role assertions206*/207async function runSemanticTagTests() {208// Test: Correct semantic roles should pass209const correctRoles = ensurePdfTextPositions(fixturePdf, [210{211subject: { text: "FIXTURE_H1_TEXT", role: "H1" },212relation: "above",213object: { text: "FIXTURE_BODY_P1_TEXT", role: "P" },214},215{216subject: { text: "FIXTURE_H2_TEXT", role: "H2" },217relation: "above",218object: { text: "FIXTURE_H3_TEXT", role: "H3" },219},220]);221await correctRoles.verify([]);222}223224/**225* Test Page role - represents entire page bounds226* Page intersects all content on that page, so directional relations should fail227*/228async function runPageRoleTests() {229// Test: Page role should NOT be above/below/leftOf/rightOf any content on same page230// because Page covers the entire page and thus intersects everything231const pageNotDirectional = ensurePdfTextPositions(232fixturePdf,233[], // No positive assertions234[235// Page 1 is NOT above body text (it contains it)236{237subject: { role: "Page", page: 1 },238relation: "above",239object: "FIXTURE_BODY_P1_TEXT",240},241// Page 1 is NOT below anything on page 1242{243subject: { role: "Page", page: 1 },244relation: "below",245object: "FIXTURE_TITLE_TEXT",246},247// Page 1 is NOT leftOf anything on page 1248{249subject: { role: "Page", page: 1 },250relation: "leftOf",251object: "FIXTURE_BODY_P1_TEXT",252},253// Page 1 is NOT rightOf anything on page 1254{255subject: { role: "Page", page: 1 },256relation: "rightOf",257object: "FIXTURE_BODY_P1_TEXT",258},259],260);261await pageNotDirectional.verify([]);262263// Test: Two Page selectors for same page should be aligned (both at origin 0,0)264const pageAlignment = ensurePdfTextPositions(fixturePdf, [265{266subject: { role: "Page", page: 1 },267relation: "topAligned",268object: { role: "Page", page: 1 },269},270{271subject: { role: "Page", page: 1 },272relation: "leftAligned",273object: { role: "Page", page: 1 },274},275]);276await pageAlignment.verify([]);277}278279/**280* Test edge override functionality for directional and alignment relations281*/282async function runEdgeOverrideTests() {283// Test: Edge override for directional relation - compare same edges284// H1's top edge should be above H2's top edge (both top edges)285const edgeOverrideDirectional = ensurePdfTextPositions(fixturePdf, [286{287subject: { text: "FIXTURE_H1_TEXT", edge: "top" },288relation: "above",289object: { text: "FIXTURE_H2_TEXT", edge: "top" },290},291]);292await edgeOverrideDirectional.verify([]);293294// Test: Edge override for alignment relation - align different edges295// This tests that we can align one element's edge with another's different edge296// Header's bottom should NOT align with body's top (they're spaced apart)297// But we can verify header.bottom < body.top by checking header.bottom is above body.top298const edgeOverrideAlignment = ensurePdfTextPositions(fixturePdf, [299{300subject: { text: "FIXTURE_H1_TEXT", edge: "bottom" },301relation: "above",302object: { text: "FIXTURE_BODY_P1_TEXT", edge: "top" },303},304]);305await edgeOverrideAlignment.verify([]);306307// Test: rightOf with edge overrides308// We know margin text is to the right of body text309// Margin's left edge should be rightOf body's right edge310const rightOfEdgeOverride = ensurePdfTextPositions(fixturePdf, [311{312subject: { text: "FIXTURE_MARGIN_TEXT", edge: "left" },313relation: "rightOf",314object: { text: "FIXTURE_BODY_P2_TEXT", edge: "right" },315},316]);317await rightOfEdgeOverride.verify([]);318319// Test: below with edge overrides320// Body P1's top should be below H1's bottom321const belowEdgeOverride = ensurePdfTextPositions(fixturePdf, [322{323subject: { text: "FIXTURE_BODY_P1_TEXT", edge: "top" },324relation: "below",325object: { text: "FIXTURE_H1_TEXT", edge: "bottom" },326},327]);328await belowEdgeOverride.verify([]);329330// Test: leftAligned with object edge override331// We can check if header's left aligns with page's left using edge override332const leftAlignedEdgeOverride = ensurePdfTextPositions(fixturePdf, [333{334subject: { text: "FIXTURE_H1_TEXT" },335relation: "leftAligned",336object: { text: "FIXTURE_H2_TEXT" },337tolerance: 5, // Allow some tolerance for heading indentation338},339]);340await leftAlignedEdgeOverride.verify([]);341}342343/**344* Test byMin/byMax distance constraint functionality345*/346async function runDistanceConstraintTests() {347// Test: byMin constraint - H1 should be at least 1pt above H2348const byMinTest = ensurePdfTextPositions(fixturePdf, [349{350subject: "FIXTURE_H1_TEXT",351relation: "above",352object: "FIXTURE_H2_TEXT",353byMin: 1,354},355]);356await byMinTest.verify([]);357358// Test: byMax constraint - header decorations shouldn't be too far from title359// Using a generous max to ensure it passes360const byMaxTest = ensurePdfTextPositions(fixturePdf, [361{362subject: { text: "FIXTURE_HEADER_TEXT", role: "Decoration" },363relation: "above",364object: "FIXTURE_TITLE_TEXT",365byMax: 500, // Generous max distance366},367]);368await byMaxTest.verify([]);369370// Test: byMin and byMax together - range constraint371const byRangeTest = ensurePdfTextPositions(fixturePdf, [372{373subject: "FIXTURE_H1_TEXT",374relation: "above",375object: "FIXTURE_H2_TEXT",376byMin: 1,377byMax: 500, // Generous range378},379]);380await byRangeTest.verify([]);381382// Test: Negative byMin (allows overlap) should work383// This tests that negative values are accepted384const negativeByMinTest = ensurePdfTextPositions(fixturePdf, [385{386subject: "FIXTURE_H1_TEXT",387relation: "above",388object: "FIXTURE_H2_TEXT",389byMin: -100, // Negative allows overlap390},391]);392await negativeByMinTest.verify([]);393394// Test: rightOf with byMin - margin should be at least some distance right of body395const rightOfByMinTest = ensurePdfTextPositions(fixturePdf, [396{397subject: { text: "FIXTURE_MARGIN_TEXT", edge: "left" },398relation: "rightOf",399object: { text: "FIXTURE_BODY_P2_TEXT", edge: "right" },400byMin: 1, // At least 1pt gap401},402]);403await rightOfByMinTest.verify([]);404405// Test: below with distance constraints406const belowByMinTest = ensurePdfTextPositions(fixturePdf, [407{408subject: "FIXTURE_H2_TEXT",409relation: "below",410object: "FIXTURE_H1_TEXT",411byMin: 1,412},413]);414await belowByMinTest.verify([]);415}416417/**418* Test error cases for distance constraints419*/420async function runDistanceConstraintErrorTests() {421// Error: byMin/byMax with alignment relation should error (Zod .strict() catches extra keys)422// Use type assertion to test runtime error handling for invalid YAML input423await assertThrowsWithPattern(424async () => {425const predicate = ensurePdfTextPositions(fixturePdf, [426{427subject: "FIXTURE_H1_TEXT",428relation: "topAligned",429object: "FIXTURE_H2_TEXT",430byMin: 10,431} as PdfTextPositionAssertion,432]);433await predicate.verify([]);434},435/Assertion.*is invalid/,436"byMin with alignment relation error",437);438439// Error: byMax with alignment relation should error (Zod .strict() catches extra keys)440// Use type assertion to test runtime error handling for invalid YAML input441await assertThrowsWithPattern(442async () => {443const predicate = ensurePdfTextPositions(fixturePdf, [444{445subject: "FIXTURE_H1_TEXT",446relation: "leftAligned",447object: "FIXTURE_H2_TEXT",448byMax: 10,449} as PdfTextPositionAssertion,450]);451await predicate.verify([]);452},453/Assertion.*is invalid/,454"byMax with alignment relation error",455);456457// Error: byMin > byMax should error (caught by Zod .refine())458await assertThrowsWithPattern(459async () => {460const predicate = ensurePdfTextPositions(fixturePdf, [461{462subject: "FIXTURE_H1_TEXT",463relation: "above",464object: "FIXTURE_H2_TEXT",465byMin: 100,466byMax: 10, // Invalid: byMin > byMax467},468]);469await predicate.verify([]);470},471/byMin must be <= byMax/i,472"byMin > byMax error",473);474475// Error: byMin constraint not satisfied (too close)476await assertThrowsWithPattern(477async () => {478const predicate = ensurePdfTextPositions(fixturePdf, [479{480subject: "FIXTURE_H1_TEXT",481relation: "above",482object: "FIXTURE_H2_TEXT",483byMin: 10000, // Unreasonably large min distance484},485]);486await predicate.verify([]);487},488/Position assertion failed.*distance.*byMin/i,489"byMin constraint not satisfied error",490);491492// Error: byMax constraint not satisfied (too far)493await assertThrowsWithPattern(494async () => {495const predicate = ensurePdfTextPositions(fixturePdf, [496{497subject: { text: "FIXTURE_HEADER_TEXT", role: "Decoration" },498relation: "above",499object: { text: "FIXTURE_FOOTER_TEXT", role: "Decoration" },500byMax: 1, // Unreasonably small max distance501},502]);503await predicate.verify([]);504},505/Position assertion failed.*distance.*byMax/i,506"byMax constraint not satisfied error",507);508}509510/**511* Test Page role with edge override functionality512*/513async function runPageRoleWithEdgeTests() {514// Test: Page's left edge should be at x=0, content should be rightOf that515// This verifies edge overrides work with Page role516const pageLeftEdgeTest = ensurePdfTextPositions(fixturePdf, [517{518subject: "FIXTURE_BODY_P1_TEXT",519relation: "rightOf",520object: { role: "Page", page: 1, edge: "left" },521},522]);523await pageLeftEdgeTest.verify([]);524525// Test: Content should be below Page's top edge526const pageTopEdgeTest = ensurePdfTextPositions(fixturePdf, [527{528subject: "FIXTURE_BODY_P1_TEXT",529relation: "below",530object: { role: "Page", page: 1, edge: "top" },531},532]);533await pageTopEdgeTest.verify([]);534535// Test: Content should be above Page's bottom edge536const pageBottomEdgeTest = ensurePdfTextPositions(fixturePdf, [537{538subject: "FIXTURE_BODY_P1_TEXT",539relation: "above",540object: { role: "Page", page: 1, edge: "bottom" },541},542]);543await pageBottomEdgeTest.verify([]);544545// Test: Content should be leftOf Page's right edge546const pageRightEdgeTest = ensurePdfTextPositions(fixturePdf, [547{548subject: "FIXTURE_BODY_P1_TEXT",549relation: "leftOf",550object: { role: "Page", page: 1, edge: "right" },551},552]);553await pageRightEdgeTest.verify([]);554555// Test: Page edge with byMin - content should be at least some distance from page edges556const pageEdgeWithByMin = ensurePdfTextPositions(fixturePdf, [557{558subject: "FIXTURE_BODY_P1_TEXT",559relation: "rightOf",560object: { role: "Page", page: 1, edge: "left" },561byMin: 1, // At least 1pt from left edge562},563]);564await pageEdgeWithByMin.verify([]);565566// Test: topAligned with Page using edge override567// Header decoration's top should be close to page top568const headerNearPageTop = ensurePdfTextPositions(fixturePdf, [569{570subject: { text: "FIXTURE_HEADER_TEXT", role: "Decoration" },571relation: "below",572object: { role: "Page", page: 1, edge: "top" },573byMax: 100, // Within 100pt of page top574},575]);576await headerNearPageTop.verify([]);577}578579/**580* Test centerX and centerY edge functionality581*/582async function runCenterEdgeTests() {583// Test: centerX - title's horizontal centre should be near page's horizontal centre (inset tolerance for minor misalignment)584const centerXPageTest = ensurePdfTextPositions(fixturePdf, [585{586subject: { text: "FIXTURE_TITLE_TEXT", edge: "centerX" },587relation: "leftAligned",588object: { role: "Page", page: 1, edge: "centerX" },589tolerance: 35,590},591]);592await centerXPageTest.verify([]);593594// Test: centerY directional - header decoration's centerY should be above title's centerY595const centerYDirectionalTest = ensurePdfTextPositions(fixturePdf, [596{597subject: { text: "FIXTURE_HEADER_TEXT", role: "Decoration", edge: "centerY" },598relation: "above",599object: { text: "FIXTURE_TITLE_TEXT", edge: "centerY" },600},601]);602await centerYDirectionalTest.verify([]);603604// Test: centerX directional - a left-aligned heading's centerX should be leftOf page centerX605const centerXDirectionalTest = ensurePdfTextPositions(fixturePdf, [606{607subject: { text: "FIXTURE_H1_TEXT", edge: "centerX" },608relation: "leftOf",609object: { role: "Page", page: 1, edge: "centerX" },610},611]);612await centerXDirectionalTest.verify([]);613}614615616