Path: blob/main/tests/verify-pdf-text-position.ts
12923 views
/*1* verify-pdf-text-position.ts2*3* PDF text position verification using semantic structure tree.4* Uses pdfjs-dist directly to access MCIDs and structure tree.5*6* REQUIREMENTS:7* This module requires tagged PDFs with PDF 1.4+ structure tree support.8* Tagged PDFs contain Marked Content Identifiers (MCIDs) that link text9* content to semantic structure elements (P, H1, Figure, Table, etc.).10*11* Currently confirmed working:12* - Typst: Produces tagged PDFs by default13*14* Not yet working:15* - LaTeX: Requires \DocumentMetadata{} before \documentclass for tagging,16* which Quarto doesn't currently support. When LaTeX tagged PDF support17* is available, this module should work with minimal changes since we18* use only basic PDF 1.4 tagged structure features.19* - ConTeXt: Pandoc supports +tagging extension, but Quarto's context20* format doesn't compile to PDF.21*22* SPECIAL ROLES:23* - role: "Decoration" - Use for untagged page elements like headers, footers,24* page numbers, and other decorations. These use text item bounds directly25* instead of requiring MCID/structure tree support.26* - role: "Page" - Use for the entire page bounds. Requires `page` field to27* specify which page number (1-indexed). The `text` field is ignored.28* Useful for NOT assertions since Page intersects all content on that page.29*30* Copyright (C) 2020-2025 Posit Software, PBC31*/3233import { assert } from "testing/asserts";34import { z } from "zod";35import { ExecuteOutput, Verify } from "./test.ts";3637// ============================================================================38// Zod Schemas and Type Definitions39// ============================================================================4041// Edge schema for precise bbox edge selection42export const EdgeSchema = z.enum(["left", "right", "top", "bottom", "centerX", "centerY"]);43export type Edge = z.infer<typeof EdgeSchema>;4445// Relation schemas46export const DirectionalRelationSchema = z.enum(["leftOf", "rightOf", "above", "below"]);47export const AlignmentRelationSchema = z.enum(["leftAligned", "rightAligned", "topAligned", "bottomAligned"]);48export const RelationSchema = z.union([DirectionalRelationSchema, AlignmentRelationSchema]);4950export type DirectionalRelation = z.infer<typeof DirectionalRelationSchema>;51export type AlignmentRelation = z.infer<typeof AlignmentRelationSchema>;52export type Relation = z.infer<typeof RelationSchema>;5354// Text selector schema55// Note: Label/ID checking is not supported because:56// 1. Typst does not write labels to PDF StructElem /ID attributes (labels become57// named destinations for links, but not structure element identifiers)58// 2. Even if IDs were present, pdf.js doesn't expose /ID through getStructTree()59export const TextSelectorSchema = z.object({60text: z.string().optional(), // Text to search for (ignored for role: "Page")61role: z.string().optional(), // PDF 1.4 structure role: P, H1, H2, Figure, Table, Span, etc.62page: z.number().optional(), // Page number (1-indexed), required for role: "Page"63edge: EdgeSchema.optional(), // Which edge to use for comparison (overrides relation default)64granularity: z.string().optional(), // Aggregate bbox to ancestor with this role (e.g., "Div", "P")65});66export type TextSelector = z.infer<typeof TextSelectorSchema>;6768// Subject/object can be a string or a TextSelector69const SubjectObjectSchema = z.union([z.string(), TextSelectorSchema]);7071// Tag-only assertion: validates semantic role without position comparison72export const TagOnlyAssertionSchema = z.object({73subject: SubjectObjectSchema,74}).strict();75export type TagOnlyAssertion = z.infer<typeof TagOnlyAssertionSchema>;7677// Directional assertion: leftOf, rightOf, above, below with optional distance constraints78export const DirectionalAssertionSchema = z.object({79subject: SubjectObjectSchema,80relation: DirectionalRelationSchema,81object: SubjectObjectSchema,82byMin: z.number().optional(), // Minimum distance between edges83byMax: z.number().optional(), // Maximum distance between edges84}).refine(85(data) => data.byMin === undefined || data.byMax === undefined || data.byMin <= data.byMax,86{ message: "byMin must be <= byMax" }87);88export type DirectionalAssertion = z.infer<typeof DirectionalAssertionSchema>;8990// Alignment assertion: leftAligned, rightAligned, topAligned, bottomAligned with tolerance91export const AlignmentAssertionSchema = z.object({92subject: SubjectObjectSchema,93relation: AlignmentRelationSchema,94object: SubjectObjectSchema,95tolerance: z.number().optional(), // Default: 2pt96}).strict();97export type AlignmentAssertion = z.infer<typeof AlignmentAssertionSchema>;9899// Union of all assertion types100export const PdfTextPositionAssertionSchema = z.union([101DirectionalAssertionSchema,102AlignmentAssertionSchema,103TagOnlyAssertionSchema,104]);105export type PdfTextPositionAssertion = z.infer<typeof PdfTextPositionAssertionSchema>;106107// Type guards for assertion discrimination (using Zod safeParse)108export function isDirectionalAssertion(a: unknown): a is DirectionalAssertion {109return DirectionalAssertionSchema.safeParse(a).success;110}111112export function isAlignmentAssertion(a: unknown): a is AlignmentAssertion {113return AlignmentAssertionSchema.safeParse(a).success;114}115116export function isTagOnlyAssertion(a: unknown): a is TagOnlyAssertion {117return TagOnlyAssertionSchema.safeParse(a).success;118}119120// Computed bounding box121interface BBox {122x: number;123y: number;124width: number;125height: number;126page: number;127}128129// Internal: text item with MCID tracking130interface MarkedTextItem {131str: string;132x: number;133y: number;134width: number;135height: number;136mcid: string | null; // e.g., "p2R_mc0"137page: number;138}139140// Structure tree node (from pdfjs-dist)141interface StructTreeNode {142role: string;143children?: (StructTreeNode | StructTreeContent)[];144alt?: string;145lang?: string;146}147148interface StructTreeContent {149type: "content" | "object" | "annotation";150id: string;151}152153// Text content item types from pdfjs-dist154interface TextItem {155str: string;156dir: string;157transform: number[];158width: number;159height: number;160fontName: string;161hasEOL: boolean;162}163164interface TextMarkedContent {165type: "beginMarkedContent" | "beginMarkedContentProps" | "endMarkedContent";166id?: string;167tag?: string;168}169170// Internal: resolved selector with computed bounds171interface ResolvedSelector {172selector: TextSelector;173textItem: MarkedTextItem;174structNode: StructTreeNode | null;175bbox: BBox;176}177178// ============================================================================179// Constants180// ============================================================================181182const DEFAULT_ALIGNMENT_TOLERANCE = 2;183184// ============================================================================185// Relation Predicates and Edge Logic186// ============================================================================187188// Coordinate system: origin at top-left, y increases downward189190// Derive relation sets from Zod schemas191const directionalRelations: Set<Relation> = new Set(DirectionalRelationSchema.options);192const alignmentRelations: Set<Relation> = new Set(AlignmentRelationSchema.options);193194// Default edges for each relation (from spec table)195const relationDefaults: Record<Relation, { subject: Edge; object: Edge }> = {196leftOf: { subject: "right", object: "left" },197rightOf: { subject: "left", object: "right" },198above: { subject: "bottom", object: "top" },199below: { subject: "top", object: "bottom" },200leftAligned: { subject: "left", object: "left" },201rightAligned: { subject: "right", object: "right" },202topAligned: { subject: "top", object: "top" },203bottomAligned: { subject: "bottom", object: "bottom" },204};205206// Extract edge value from bbox207function getEdgeValue(bbox: BBox, edge: Edge): number {208switch (edge) {209case "left":210return bbox.x;211case "right":212return bbox.x + bbox.width;213case "top":214return bbox.y;215case "bottom":216return bbox.y + bbox.height;217case "centerX":218return bbox.x + bbox.width / 2;219case "centerY":220return bbox.y + bbox.height / 2;221}222}223224// Evaluate directional relation with edge overrides and distance constraints225interface DirectionalResult {226passed: boolean;227subjectEdge: Edge;228objectEdge: Edge;229subjectValue: number;230objectValue: number;231distance: number;232failureReason?: string;233}234235function evaluateDirectionalRelation(236relation: DirectionalRelation,237subjectBBox: BBox,238objectBBox: BBox,239subjectEdgeOverride?: Edge,240objectEdgeOverride?: Edge,241byMin?: number,242byMax?: number,243): DirectionalResult {244const defaults = relationDefaults[relation];245const subjectEdge = subjectEdgeOverride ?? defaults.subject;246const objectEdge = objectEdgeOverride ?? defaults.object;247248const subjectValue = getEdgeValue(subjectBBox, subjectEdge);249const objectValue = getEdgeValue(objectBBox, objectEdge);250251// Distance calculation depends on relation direction252// For leftOf/above: distance = objectEdge - subjectEdge (positive when relation holds)253// For rightOf/below: distance = subjectEdge - objectEdge (positive when relation holds)254let distance: number;255let directionPassed: boolean;256257if (relation === "leftOf" || relation === "above") {258distance = objectValue - subjectValue;259directionPassed = subjectValue < objectValue;260} else {261// rightOf or below262distance = subjectValue - objectValue;263directionPassed = subjectValue > objectValue;264}265266const result: DirectionalResult = {267passed: true,268subjectEdge,269objectEdge,270subjectValue,271objectValue,272distance,273};274275// Check directional constraint276if (!directionPassed) {277result.passed = false;278result.failureReason = "directional constraint not satisfied";279return result;280}281282// Check byMin constraint283if (byMin !== undefined && distance < byMin) {284result.passed = false;285result.failureReason = `distance ${distance.toFixed(1)}pt < byMin ${byMin}pt`;286return result;287}288289// Check byMax constraint290if (byMax !== undefined && distance > byMax) {291result.passed = false;292result.failureReason = `distance ${distance.toFixed(1)}pt > byMax ${byMax}pt`;293return result;294}295296return result;297}298299// Evaluate alignment relation with edge overrides300interface AlignmentResult {301passed: boolean;302subjectEdge: Edge;303objectEdge: Edge;304subjectValue: number;305objectValue: number;306difference: number;307}308309function evaluateAlignmentRelation(310relation: AlignmentRelation,311subjectBBox: BBox,312objectBBox: BBox,313tolerance: number,314subjectEdgeOverride?: Edge,315objectEdgeOverride?: Edge,316): AlignmentResult {317const defaults = relationDefaults[relation];318const subjectEdge = subjectEdgeOverride ?? defaults.subject;319const objectEdge = objectEdgeOverride ?? defaults.object;320321const subjectValue = getEdgeValue(subjectBBox, subjectEdge);322const objectValue = getEdgeValue(objectBBox, objectEdge);323const difference = Math.abs(subjectValue - objectValue);324325return {326passed: difference <= tolerance,327subjectEdge,328objectEdge,329subjectValue,330objectValue,331difference,332};333}334335// ============================================================================336// Helper Functions337// ============================================================================338339function normalizeSelector(s: string | TextSelector): TextSelector {340if (typeof s === "string") {341return { text: s };342}343return s;344}345346function isStructTreeContent(node: StructTreeNode | StructTreeContent): node is StructTreeContent {347return "type" in node && (node.type === "content" || node.type === "object" || node.type === "annotation");348}349350function isTextItem(item: TextItem | TextMarkedContent): item is TextItem {351return "str" in item && typeof item.str === "string";352}353354function isTextMarkedContent(item: TextItem | TextMarkedContent): item is TextMarkedContent {355return "type" in item && typeof item.type === "string";356}357358/**359* Extract MarkedTextItem[] from pdfjs getTextContent result.360* Tracks current MCID as we iterate through interleaved items.361*/362function extractMarkedTextItems(363items: (TextItem | TextMarkedContent)[],364pageNum: number,365pageHeight: number,366): MarkedTextItem[] {367const result: MarkedTextItem[] = [];368let currentMcid: string | null = null;369370for (const item of items) {371if (isTextMarkedContent(item)) {372if (item.type === "beginMarkedContentProps" && item.id) {373currentMcid = item.id;374} else if (item.type === "endMarkedContent") {375currentMcid = null;376}377} else if (isTextItem(item)) {378// Transform: [scaleX, skewX, skewY, scaleY, translateX, translateY]379const tm = item.transform;380const x = tm[4];381// Convert from PDF coordinates (bottom-left origin) to top-left origin382const y = pageHeight - tm[5];383const height = Math.sqrt(tm[2] * tm[2] + tm[3] * tm[3]);384385result.push({386str: item.str,387x,388y,389width: item.width,390height,391mcid: currentMcid,392page: pageNum,393});394}395}396397return result;398}399400/**401* Recursively build MCID -> StructNode map and parent map from structure tree.402* Returns the struct node that directly contains the MCID content, plus a map403* from each struct node to its parent for tree traversal.404*/405function buildMcidStructMap(406tree: StructTreeNode | null,407mcidMap: Map<string, StructTreeNode> = new Map(),408parentMap: Map<StructTreeNode, StructTreeNode> = new Map(),409parentNode: StructTreeNode | null = null,410): { mcidMap: Map<string, StructTreeNode>; parentMap: Map<StructTreeNode, StructTreeNode> } {411if (!tree) return { mcidMap, parentMap };412413for (const child of tree.children ?? []) {414if (isStructTreeContent(child)) {415if (child.type === "content" && child.id) {416// Map MCID to the parent struct node (the semantic element)417mcidMap.set(child.id, parentNode ?? tree);418}419} else {420// Record parent for tree traversal421if (parentNode) {422parentMap.set(child, parentNode);423} else {424// Root-level children have tree as parent425parentMap.set(child, tree);426}427// Recurse into child struct nodes428buildMcidStructMap(child, mcidMap, parentMap, child);429}430}431432return { mcidMap, parentMap };433}434435/**436* Collect only direct MCIDs under a structure node (non-recursive).437* Does not descend into child structure nodes.438*/439function collectDirectMcids(node: StructTreeNode): string[] {440const mcids: string[] = [];441442for (const child of node.children ?? []) {443if (isStructTreeContent(child)) {444if (child.type === "content" && child.id) {445mcids.push(child.id);446}447}448// Do NOT recurse into child struct nodes449}450451return mcids;452}453454/**455* Recursively collect ALL MCIDs under a structure node and its descendants.456* Used for granularity aggregation to compute bbox of an entire subtree.457*/458function collectAllMcids(node: StructTreeNode): string[] {459const mcids: string[] = [];460461for (const child of node.children ?? []) {462if (isStructTreeContent(child)) {463if (child.type === "content" && child.id) {464mcids.push(child.id);465}466} else {467// Recurse into child struct nodes468mcids.push(...collectAllMcids(child));469}470}471472return mcids;473}474475/**476* Walk up the structure tree to find the nearest ancestor with a matching role.477* Returns null if no ancestor with the target role is found.478*/479function findAncestorWithRole(480node: StructTreeNode,481targetRole: string,482parentMap: Map<StructTreeNode, StructTreeNode>,483): StructTreeNode | null {484let current: StructTreeNode | undefined = node;485while (current) {486if (current.role === targetRole) {487return current;488}489current = parentMap.get(current);490}491return null;492}493494/**495* Check if a string is whitespace-only (including empty).496* Used to filter out horizontal skip spaces in PDF content.497*/498function isWhitespaceOnly(str: string): boolean {499return str.trim().length === 0;500}501502/**503* Compute union bounding box from multiple items.504* Filters out whitespace-only text items to avoid including horizontal skips.505*/506function unionBBox(items: MarkedTextItem[]): BBox | null {507// Filter out whitespace-only items (these are often horizontal skips)508const contentItems = items.filter((item) => !isWhitespaceOnly(item.str));509if (contentItems.length === 0) return null;510511let minX = Infinity;512let minY = Infinity;513let maxX = -Infinity;514let maxY = -Infinity;515const page = contentItems[0].page;516517for (const item of contentItems) {518minX = Math.min(minX, item.x);519minY = Math.min(minY, item.y);520maxX = Math.max(maxX, item.x + item.width);521maxY = Math.max(maxY, item.y + item.height);522}523524return {525x: minX,526y: minY,527width: maxX - minX,528height: maxY - minY,529page,530};531}532533/**534* Compute semantic bounding box for a structure node.535* Uses only direct MCIDs (non-recursive) to avoid including nested elements536* like margin content that may be children of body paragraphs.537*/538function computeStructBBox(539node: StructTreeNode,540mcidToTextItems: Map<string, MarkedTextItem[]>,541): BBox | null {542const mcids = collectDirectMcids(node);543const items = mcids.flatMap((id) => mcidToTextItems.get(id) ?? []);544return unionBBox(items);545}546547// ============================================================================548// Main Predicate549// ============================================================================550551/**552* Verify spatial positions of text in a rendered PDF using semantic structure.553* Uses pdfjs-dist to access MCIDs and structure tree.554*/555export const ensurePdfTextPositions = (556file: string,557assertions: PdfTextPositionAssertion[],558noMatchAssertions?: PdfTextPositionAssertion[],559): Verify => {560return {561name: `Inspecting ${file} for text position assertions`,562verify: async (_output: ExecuteOutput[]) => {563const errors: string[] = [];564565// Internal normalized assertion type for processing566type NormalizedAssertion = {567subject: TextSelector;568relation?: Relation;569object?: TextSelector;570tolerance: number;571byMin?: number;572byMax?: number;573};574575// Validate and normalize an assertion using Zod576const normalizeAssertion = (a: unknown, index: number): NormalizedAssertion | null => {577// Try parsing as each type in order of specificity578const directionalResult = DirectionalAssertionSchema.safeParse(a);579if (directionalResult.success) {580const d = directionalResult.data;581return {582subject: normalizeSelector(d.subject),583relation: d.relation,584object: normalizeSelector(d.object),585tolerance: DEFAULT_ALIGNMENT_TOLERANCE,586byMin: d.byMin,587byMax: d.byMax,588};589}590591const alignmentResult = AlignmentAssertionSchema.safeParse(a);592if (alignmentResult.success) {593const al = alignmentResult.data;594return {595subject: normalizeSelector(al.subject),596relation: al.relation,597object: normalizeSelector(al.object),598tolerance: al.tolerance ?? DEFAULT_ALIGNMENT_TOLERANCE,599};600}601602const tagOnlyResult = TagOnlyAssertionSchema.safeParse(a);603if (tagOnlyResult.success) {604return {605subject: normalizeSelector(tagOnlyResult.data.subject),606tolerance: DEFAULT_ALIGNMENT_TOLERANCE,607};608}609610// None of the schemas matched - report validation error611const fullResult = PdfTextPositionAssertionSchema.safeParse(a);612if (!fullResult.success) {613const zodErrors = fullResult.error.errors.map(e => `${e.path.join(".")}: ${e.message}`).join("; ");614errors.push(`Assertion ${index + 1} is invalid: ${zodErrors}`);615}616return null;617};618619// Stage 1: Parse and validate assertions620const normalizedAssertions = assertions621.map((a, i) => normalizeAssertion(a, i))622.filter((a): a is NormalizedAssertion => a !== null);623624const normalizedNoMatch = noMatchAssertions625?.map((a, i) => normalizeAssertion(a, i + assertions.length))626.filter((a): a is NormalizedAssertion => a !== null);627628// Track search texts and their selectors (to know if Decoration role is requested)629// Page role selectors are tracked separately since they don't need text search630const searchTexts = new Set<string>();631const textToSelectors = new Map<string, TextSelector[]>();632const pageSelectors = new Map<number, TextSelector>(); // page number -> selector633634// Helper: check if selector is a Page role (no text search needed)635const isPageRole = (sel: TextSelector): boolean => sel.role === "Page";636637// Helper: get unique key for a selector (for resolvedSelectors map)638// Includes granularity since different granularity settings need different bbox computation639const selectorKey = (sel: TextSelector): string => {640if (isPageRole(sel)) {641return `Page:${sel.page}`;642}643const base = sel.text ?? "";644if (sel.granularity) {645return `${base}@${sel.granularity}`;646}647return base;648};649650// Track unique selectors by their full key (including granularity)651const uniqueSelectors = new Map<string, TextSelector>();652653const addSelector = (sel: TextSelector) => {654if (isPageRole(sel)) {655if (sel.page === undefined) {656errors.push(`Page role requires 'page' field to specify page number`);657return;658}659pageSelectors.set(sel.page, sel);660} else {661if (!sel.text) {662errors.push(`Selector requires 'text' field (unless role is "Page")`);663return;664}665searchTexts.add(sel.text);666const existing = textToSelectors.get(sel.text) ?? [];667existing.push(sel);668textToSelectors.set(sel.text, existing);669// Also track by full key for resolution670uniqueSelectors.set(selectorKey(sel), sel);671}672};673674for (const a of normalizedAssertions) {675addSelector(a.subject);676if (a.object) addSelector(a.object);677}678for (const a of normalizedNoMatch ?? []) {679addSelector(a.subject);680if (a.object) addSelector(a.object);681}682683// Helper: check if any selector for this text is a Decoration (untagged content)684const isDecoration = (text: string): boolean => {685const selectors = textToSelectors.get(text) ?? [];686return selectors.some((s) => s.role === "Decoration");687};688689// Stage 2: Load PDF with pdfjs-dist690// deno-lint-ignore no-explicit-any691const pdfjsLib = await import("pdfjs-dist") as any;692const buffer = await Deno.readFile(file);693const doc = await pdfjsLib.getDocument({ data: buffer }).promise;694695// Stage 3 & 4: Extract content and structure tree per page696const allTextItems: MarkedTextItem[] = [];697const mcidToTextItems = new Map<string, MarkedTextItem[]>();698const mcidToStructNode = new Map<string, StructTreeNode>();699const structNodeToParent = new Map<StructTreeNode, StructTreeNode>();700const pageDimensions = new Map<number, { width: number; height: number }>();701702for (let pageNum = 1; pageNum <= doc.numPages; pageNum++) {703const page = await doc.getPage(pageNum);704const viewport = page.getViewport({ scale: 1.0 });705706// Store page dimensions for Page role707pageDimensions.set(pageNum, { width: viewport.width, height: viewport.height });708709// Get text content with marked content710const textContent = await page.getTextContent({711includeMarkedContent: true,712});713714const pageItems = extractMarkedTextItems(715textContent.items,716pageNum,717viewport.height,718);719allTextItems.push(...pageItems);720721// Build MCID -> text items map722for (const item of pageItems) {723if (item.mcid) {724const existing = mcidToTextItems.get(item.mcid) ?? [];725existing.push(item);726mcidToTextItems.set(item.mcid, existing);727}728}729730// Get structure tree and build MCID -> struct node map + parent map731const structTree = await page.getStructTree();732if (structTree) {733const { mcidMap, parentMap } = buildMcidStructMap(structTree);734for (const [k, v] of mcidMap) {735mcidToStructNode.set(k, v);736}737for (const [k, v] of parentMap) {738structNodeToParent.set(k, v);739}740}741}742743// Stage 5: Find text items for each search text (must be unique, unless Decoration)744const foundTexts = new Map<string, MarkedTextItem>();745const ambiguousTexts = new Set<string>();746for (const searchText of searchTexts) {747const matches = allTextItems.filter((t) => t.str.includes(searchText));748if (matches.length === 1) {749foundTexts.set(searchText, matches[0]);750} else if (matches.length > 1) {751// Decoration role (headers, footers) naturally repeat on each page - allow first match752if (isDecoration(searchText)) {753foundTexts.set(searchText, matches[0]);754} else {755ambiguousTexts.add(searchText);756errors.push(757`Text "${searchText}" is ambiguous - found ${matches.length} matches. Use a more specific search string.`,758);759}760}761// If matches.length === 0, we'll report "not found" later762}763764// Stage 6 & 7: Resolve selectors to structure nodes and compute bboxes765const resolvedSelectors = new Map<string, ResolvedSelector>();766767// First, resolve Page role selectors (no text search needed)768for (const [pageNum, sel] of pageSelectors) {769const dims = pageDimensions.get(pageNum);770if (!dims) {771errors.push(`Page ${pageNum} does not exist in PDF (has ${pageDimensions.size} pages)`);772continue;773}774const key = selectorKey(sel);775resolvedSelectors.set(key, {776selector: sel,777textItem: { str: "", x: 0, y: 0, width: 0, height: 0, mcid: null, page: pageNum },778structNode: null,779bbox: {780x: 0,781y: 0,782width: dims.width,783height: dims.height,784page: pageNum,785},786});787}788789// Then, resolve text-based selectors (iterate by unique selector key to handle granularity)790for (const [key, selector] of uniqueSelectors) {791const searchText = selector.text!;792const textItem = foundTexts.get(searchText);793if (!textItem) {794// Don't report "not found" if we already reported "ambiguous"795if (!ambiguousTexts.has(searchText)) {796errors.push(`Text not found in PDF: "${searchText}"`);797}798continue;799}800801let structNode: StructTreeNode | null = null;802let bbox: BBox;803804// Decoration role: use text item bounds directly (for headers, footers, page decorations)805if (isDecoration(searchText)) {806bbox = {807x: textItem.x,808y: textItem.y,809width: textItem.width,810height: textItem.height,811page: textItem.page,812};813} else if (!textItem.mcid) {814errors.push(815`Text "${searchText}" has no MCID - PDF may not be tagged. Use role: "Decoration" for untagged page elements like headers/footers.`,816);817continue;818} else {819structNode = mcidToStructNode.get(textItem.mcid) ?? null;820821// Check for granularity: aggregate bbox to ancestor with target role822if (selector.granularity && structNode) {823const ancestor = findAncestorWithRole(structNode, selector.granularity, structNodeToParent);824if (ancestor) {825// Collect ALL MCIDs recursively under that ancestor826const allMcids = collectAllMcids(ancestor);827const allItems = allMcids.flatMap((id) => mcidToTextItems.get(id) ?? []);828const ancestorBBox = unionBBox(allItems);829if (ancestorBBox) {830bbox = ancestorBBox;831} else {832errors.push(833`Could not compute bbox for "${searchText}" with granularity "${selector.granularity}" - no content items found`,834);835continue;836}837} else {838errors.push(839`No ancestor with role "${selector.granularity}" found for "${searchText}"`,840);841continue;842}843} else {844// Same-MCID approach: compute bbox from all text items sharing this MCID845const mcidItems = mcidToTextItems.get(textItem.mcid);846if (mcidItems && mcidItems.length > 0) {847const mcidBBox = unionBBox(mcidItems);848if (mcidBBox) {849bbox = mcidBBox;850} else {851errors.push(852`Could not compute bbox for "${searchText}" - all text items in MCID are whitespace-only`,853);854continue;855}856} else {857errors.push(858`No text items found for MCID ${textItem.mcid} containing "${searchText}"`,859);860continue;861}862}863}864865resolvedSelectors.set(key, {866selector,867textItem,868structNode,869bbox,870});871}872873// Validate role assertions (skip Page role since it's a virtual selector)874for (const a of normalizedAssertions) {875if (isPageRole(a.subject)) continue; // Page role has no struct node to validate876877const resolved = resolvedSelectors.get(selectorKey(a.subject));878if (!resolved) continue;879880if (a.subject.role && resolved.structNode) {881if (resolved.structNode.role !== a.subject.role) {882errors.push(883`Role mismatch for "${a.subject.text}": expected ${a.subject.role}, got ${resolved.structNode.role}`,884);885}886}887888if (a.object && !isPageRole(a.object)) {889const resolvedObj = resolvedSelectors.get(selectorKey(a.object));890if (!resolvedObj) continue;891892if (a.object.role && resolvedObj.structNode) {893if (resolvedObj.structNode.role !== a.object.role) {894errors.push(895`Role mismatch for "${a.object.text}": expected ${a.object.role}, got ${resolvedObj.structNode.role}`,896);897}898}899}900}901902// Stage 8: Evaluate position assertions903// Note: Zod validation in Stage 1 already handles:904// - Unknown relations905// - byMin/byMax with alignment relations (via .strict())906// - byMin > byMax (via .refine())907for (const a of normalizedAssertions) {908// Tag-only assertions (no relation/object)909if (!a.relation || !a.object) {910continue; // Already validated in stage 6911}912913const subjectKey = selectorKey(a.subject);914const objectKey = selectorKey(a.object);915const subjectResolved = resolvedSelectors.get(subjectKey);916const objectResolved = resolvedSelectors.get(objectKey);917918if (!subjectResolved || !objectResolved) {919continue; // Error already recorded920}921922// Check same page923if (subjectResolved.bbox.page !== objectResolved.bbox.page) {924errors.push(925`Cannot compare positions: "${subjectKey}" is on page ${subjectResolved.bbox.page}, ` +926`"${objectKey}" is on page ${objectResolved.bbox.page}`,927);928continue;929}930931// Evaluate relation based on type (Zod guarantees valid relation type)932const isDirectional = directionalRelations.has(a.relation);933if (isDirectional) {934const result = evaluateDirectionalRelation(935a.relation as DirectionalRelation,936subjectResolved.bbox,937objectResolved.bbox,938a.subject.edge,939a.object.edge,940a.byMin,941a.byMax,942);943944if (!result.passed) {945const distanceInfo = a.byMin !== undefined || a.byMax !== undefined946? ` Distance: ${result.distance.toFixed(1)}pt` +947(a.byMin !== undefined ? ` (required >= ${a.byMin}pt)` : "") +948(a.byMax !== undefined ? ` (required <= ${a.byMax}pt)` : "")949: "";950errors.push(951`Position assertion failed (page ${subjectResolved.bbox.page}): "${subjectKey}" is NOT ${a.relation} "${objectKey}".` +952` Subject.${result.subjectEdge}=${result.subjectValue.toFixed(1)},` +953` Object.${result.objectEdge}=${result.objectValue.toFixed(1)}.${distanceInfo}` +954(result.failureReason ? ` (${result.failureReason})` : ""),955);956}957} else {958// Alignment relation959const result = evaluateAlignmentRelation(960a.relation as AlignmentRelation,961subjectResolved.bbox,962objectResolved.bbox,963a.tolerance,964a.subject.edge,965a.object.edge,966);967968if (!result.passed) {969errors.push(970`Position assertion failed (page ${subjectResolved.bbox.page}): "${subjectKey}" is NOT ${a.relation} "${objectKey}".` +971` Subject.${result.subjectEdge}=${result.subjectValue.toFixed(1)},` +972` Object.${result.objectEdge}=${result.objectValue.toFixed(1)}.` +973` Difference: ${result.difference.toFixed(1)}pt (tolerance: ${a.tolerance}pt)`,974);975}976}977}978979// Evaluate negative assertions980// Note: Zod validation already handled in Stage 1981for (const a of normalizedNoMatch ?? []) {982if (!a.relation || !a.object) continue;983984const subjectKey = selectorKey(a.subject);985const objectKey = selectorKey(a.object);986const subjectResolved = resolvedSelectors.get(subjectKey);987const objectResolved = resolvedSelectors.get(objectKey);988989if (!subjectResolved || !objectResolved) {990continue; // Assertion trivially doesn't hold991}992993if (subjectResolved.bbox.page !== objectResolved.bbox.page) {994continue; // Assertion trivially doesn't hold995}996997// Evaluate relation based on type (Zod guarantees valid relation type)998const isDirectional = directionalRelations.has(a.relation);999let passed: boolean;1000let resultInfo: string;10011002if (isDirectional) {1003const result = evaluateDirectionalRelation(1004a.relation as DirectionalRelation,1005subjectResolved.bbox,1006objectResolved.bbox,1007a.subject.edge,1008a.object.edge,1009a.byMin,1010a.byMax,1011);1012passed = result.passed;1013resultInfo = `Subject.${result.subjectEdge}=${result.subjectValue.toFixed(1)}, ` +1014`Object.${result.objectEdge}=${result.objectValue.toFixed(1)}, ` +1015`distance=${result.distance.toFixed(1)}pt`;1016} else {1017const result = evaluateAlignmentRelation(1018a.relation as AlignmentRelation,1019subjectResolved.bbox,1020objectResolved.bbox,1021a.tolerance,1022a.subject.edge,1023a.object.edge,1024);1025passed = result.passed;1026resultInfo = `Subject.${result.subjectEdge}=${result.subjectValue.toFixed(1)}, ` +1027`Object.${result.objectEdge}=${result.objectValue.toFixed(1)}, ` +1028`difference=${result.difference.toFixed(1)}pt`;1029}10301031if (passed) {1032errors.push(1033`Negative assertion failed (page ${subjectResolved.bbox.page}): "${subjectKey}" IS ${a.relation} "${objectKey}" (expected NOT to be). ` +1034resultInfo,1035);1036}1037}10381039// Stage 9: Aggregate errors1040if (errors.length > 0) {1041assert(1042false,1043`PDF position assertions failed in ${file}:\n${errors.map((e, i) => ` ${i + 1}. ${e}`).join("\n")}`,1044);1045}1046},1047};1048};104910501051