CoCalc -- format-pdf.ts

GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/src/format/pdf/format-pdf.ts
¹²⁹²⁵ views
1
/*
2
 * format-pdf.ts
3
 *
4
 * Copyright (C) 2020-2022 Posit Software, PBC
5
 */
6

7
import { basename, extname, join } from "../../deno_ral/path.ts";
8

9
import { mergeConfigs } from "../../core/config.ts";
10
import { texSafeFilename } from "../../core/tex.ts";
11

12
import {
13
  kBibliography,
14
  kCapBottom,
15
  kCapLoc,
16
  kCapTop,
17
  kCitationLocation,
18
  kCiteMethod,
19
  kClassOption,
20
  kDefaultImageExtension,
21
  kDocumentClass,
22
  kEcho,
23
  kFigCapLoc,
24
  kFigDpi,
25
  kFigFormat,
26
  kFigHeight,
27
  kFigWidth,
28
  kHeaderIncludes,
29
  kKeepTex,
30
  kLang,
31
  kNumberSections,
32
  kPaperSize,
33
  kPdfEngine,
34
  kPdfStandard,
35
  kPdfStandardApplied,
36
  kReferenceLocation,
37
  kShiftHeadingLevelBy,
38
  kTblCapLoc,
39
  kTopLevelDivision,
40
  kWarning,
41
  pdfStandardEnv,
42
} from "../../config/constants.ts";
43
import { warning } from "../../deno_ral/log.ts";
44
import { asArray } from "../../core/array.ts";
45
import { Format, FormatExtras, PandocFlags } from "../../config/types.ts";
46

47
import { createFormat } from "../formats-shared.ts";
48

49
import { RenderedFile, RenderServices } from "../../command/render/types.ts";
50
import { ProjectConfig, ProjectContext } from "../../project/types.ts";
51
import { BookExtension } from "../../project/types/book/book-shared.ts";
52

53
import { readLines } from "io/read-lines";
54
import { TempContext } from "../../core/temp.ts";
55
import { isLatexPdfEngine, pdfEngine } from "../../config/pdf.ts";
56
import { formatResourcePath } from "../../core/resources.ts";
57
import { kTemplatePartials } from "../../command/render/template.ts";
58
import { copyTo } from "../../core/copy.ts";
59
import { kCodeAnnotations } from "../html/format-html-shared.ts";
60
import { safeModeFromFile } from "../../deno_ral/fs.ts";
61
import { hasLevelOneHeadings as hasL1Headings } from "../../core/lib/markdown-analysis/level-one-headings.ts";
62

63
export function pdfFormat(): Format {
64
  return mergeConfigs(
65
    createPdfFormat("PDF"),
66
    {
67
      extensions: {
68
        book: pdfBookExtension,
69
      },
70
    },
71
  );
72
}
73

74
export function beamerFormat(): Format {
75
  return createFormat(
76
    "Beamer",
77
    "pdf",
78
    createPdfFormat("Beamer", false, false),
79
    {
80
      execute: {
81
        [kFigWidth]: 10,
82
        [kFigHeight]: 7,
83
        [kEcho]: false,
84
        [kWarning]: false,
85
      },
86
      classoption: ["notheorems"],
87
    },
88
  );
89
}
90

91
export function latexFormat(displayName: string): Format {
92
  return createFormat(
93
    displayName,
94
    "tex",
95
    mergeConfigs(
96
      createPdfFormat(displayName),
97
      {
98
        extensions: {
99
          book: {
100
            onSingleFilePreRender: (
101
              format: Format,
102
              _config?: ProjectConfig,
103
            ) => {
104
              // If we're targeting LaTeX output, be sure to keep
105
              // the supporting files around (since we're not building
106
              // them into a PDF)
107
              format.render[kKeepTex] = true;
108
              return format;
109
            },
110
            formatOutputDirectory: () => {
111
              return "book-latex";
112
            },
113
          },
114
        },
115
      },
116
    ),
117
  );
118
}
119

120
function createPdfFormat(
121
  displayName: string,
122
  autoShiftHeadings = true,
123
  koma = true,
124
): Format {
125
  return createFormat(
126
    displayName,
127
    "pdf",
128
    {
129
      execute: {
130
        [kFigWidth]: 5.5,
131
        [kFigHeight]: 3.5,
132
        [kFigFormat]: "pdf",
133
        [kFigDpi]: 300,
134
      },
135
      pandoc: {
136
        [kPdfEngine]: "lualatex",
137
        standalone: true,
138
        variables: {
139
          graphics: true,
140
          tables: true,
141
        },
142
        [kDefaultImageExtension]: "pdf",
143
      },
144
      metadata: {
145
        ["block-headings"]: true,
146
      },
147
      formatExtras: async (
148
        _input: string,
149
        markdown: string,
150
        flags: PandocFlags,
151
        format: Format,
152
        _libDir: string,
153
        services: RenderServices,
154
      ) => {
155
        const extras: FormatExtras = {};
156

157
        // only apply extras if this is latex (as opposed to context)
158
        const engine = pdfEngine(format.pandoc, format.render, flags);
159
        if (!isLatexPdfEngine(engine)) {
160
          return extras;
161
        }
162

163
        // Post processed for dealing with latex output
164
        extras.postprocessors = [
165
          pdfLatexPostProcessor(flags, format, services.temp),
166
        ];
167

168
        // user may have overridden koma, check for that here
169
        const documentclass = format.metadata[kDocumentClass] as
170
          | string
171
          | undefined;
172

173
        const usingCustomTemplates = format.pandoc.template !== undefined ||
174
          format.metadata[kTemplatePartials] !== undefined;
175

176
        if (
177
          usingCustomTemplates ||
178
          (documentclass &&
179
            ![
180
              "srcbook",
181
              "scrreprt",
182
              "scrreport",
183
              "scrartcl",
184
              "scrarticle",
185
            ].includes(
186
              documentclass,
187
            ))
188
        ) {
189
          koma = false;
190
        }
191

192
        // default to KOMA article class. we do this here rather than
193
        // above so that projectExtras can override us
194
        if (koma) {
195
          // determine caption options
196
          const captionOptions = [];
197
          const tblCaploc = tblCapLocation(format);
198
          captionOptions.push(
199
            tblCaploc === kCapTop ? "tableheading" : "tablesignature",
200
          );
201
          if (figCapLocation(format) === kCapTop) {
202
            captionOptions.push("figureheading");
203
          }
204

205
          // establish default class options
206
          const defaultClassOptions = ["DIV=11"];
207
          if (format.metadata[kLang] !== "de") {
208
            defaultClassOptions.push("numbers=noendperiod");
209
          }
210

211
          // determine class options (filter by options already set by the user)
212
          const userClassOptions = format.metadata[kClassOption] as
213
            | string[]
214
            | undefined;
215
          const classOptions = defaultClassOptions.filter((option) => {
216
            if (Array.isArray(userClassOptions)) {
217
              const name = option.split("=")[0];
218
              return !userClassOptions.some((userOption) =>
219
                String(userOption).startsWith(name + "=")
220
              );
221
            } else {
222
              return true;
223
            }
224
          });
225

226
          const headerIncludes = [];
227
          headerIncludes.push(
228
            "\\KOMAoption{captions}{" + captionOptions.join(",") + "}",
229
          );
230

231
          extras.metadata = {
232
            [kDocumentClass]: "scrartcl",
233
            [kClassOption]: classOptions,
234
            [kPaperSize]: "letter",
235
            [kHeaderIncludes]: headerIncludes,
236
          };
237
        }
238

239
        // Provide a custom template for this format
240
        // Partials can be the one from Quarto division
241
        const partialNamesQuarto: string[] = [
242
          "babel-lang",
243
          "before-bib",
244
          "biblio",
245
          "biblio-config",
246
          "citations",
247
          "doc-class",
248
          "graphics",
249
          "after-body",
250
          "before-body",
251
          "pandoc",
252
          "tables",
253
          "tightlist",
254
          "before-title",
255
          "title",
256
          "toc",
257
        ];
258
        // or the one from Pandoc division (since Pandoc 3.6.3)
259
        const partialNamesPandoc: string[] = [
260
          "after-header-includes",
261
          "common",
262
          "document-metadata",
263
          "font-settings",
264
          "fonts",
265
          "hypersetup",
266
          "passoptions",
267
        ];
268

269
        const createTemplateContext = function (
270
          to: string,
271
          partialNamesQuarto: string[],
272
          partialNamesPandoc: string[],
273
        ) {
274
          return {
275
            template: formatResourcePath(to, "pandoc/template.tex"),
276
            partials: [
277
              ...partialNamesQuarto.map((name) => {
278
                return formatResourcePath(to, `pandoc/${name}.tex`);
279
              }),
280
              ...partialNamesPandoc.map((name) => {
281
                return formatResourcePath(to, `pandoc/${name}.latex`);
282
              }),
283
            ],
284
          };
285
        };
286
        // Beamer doesn't use document-metadata partial (its template doesn't include it)
287
        const beamerPartialNamesPandoc = partialNamesPandoc.filter(
288
          (name) => name !== "document-metadata",
289
        );
290
        extras.templateContext = createTemplateContext(
291
          displayName === "Beamer" ? "beamer" : "pdf",
292
          partialNamesQuarto,
293
          displayName === "Beamer"
294
            ? beamerPartialNamesPandoc
295
            : partialNamesPandoc,
296
        );
297

298
        // Don't shift the headings if we see any H1s (we can't shift up any longer)
299
        const hasLevelOneHeadings = await hasL1Headings(markdown);
300

301
        // pdfs with no other heading level oriented options get their heading level shifted by -1
302
        if (
303
          !hasLevelOneHeadings &&
304
          autoShiftHeadings &&
305
          (flags?.[kNumberSections] === true ||
306
            format.pandoc[kNumberSections] === true) &&
307
          flags?.[kTopLevelDivision] === undefined &&
308
          format.pandoc?.[kTopLevelDivision] === undefined &&
309
          flags?.[kShiftHeadingLevelBy] === undefined &&
310
          format.pandoc?.[kShiftHeadingLevelBy] === undefined
311
        ) {
312
          extras.pandoc = {
313
            [kShiftHeadingLevelBy]: -1,
314
          };
315
        }
316

317
        // pdfs with document class scrbook get number sections turned on
318
        // https://github.com/quarto-dev/quarto-cli/issues/2369
319
        extras.pandoc = extras.pandoc || {};
320
        if (
321
          documentclass === "scrbook" &&
322
          format.pandoc[kNumberSections] !== false &&
323
          flags[kNumberSections] !== false
324
        ) {
325
          extras.pandoc[kNumberSections] = true;
326
        }
327

328
        // Handle pdf-standard option for PDF/A, PDF/UA, PDF/X conformance
329
        const pdfStandard = asArray(
330
          format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard] ??
331
            pdfStandardEnv(),
332
        );
333
        if (pdfStandard.length > 0) {
334
          const { version, standards, needsTagging } =
335
            normalizePdfStandardForLatex(pdfStandard);
336
          // Set pdfstandard as a map if there are standards or a version
337
          if (standards.length > 0 || version) {
338
            extras.pandoc.variables = extras.pandoc.variables || {};
339
            const pdfstandardMap: Record<string, unknown> = {};
340
            if (standards.length > 0) {
341
              pdfstandardMap.standards = standards;
342
            }
343
            if (version) {
344
              pdfstandardMap.version = version;
345
            }
346
            if (needsTagging) {
347
              pdfstandardMap.tagging = true;
348
            }
349
            extras.pandoc.variables["pdfstandard"] = pdfstandardMap;
350
          }
351
          // Store applied standards in metadata for verapdf validation
352
          // (only standards that LaTeX actually supports, not the original list)
353
          if (standards.length > 0) {
354
            extras.metadata = extras.metadata || {};
355
            extras.metadata[kPdfStandardApplied] = standards;
356
          }
357
        }
358

359
        return extras;
360
      },
361
    },
362
  );
363
}
364

365
const pdfBookExtension: BookExtension = {
366
  selfContainedOutput: true,
367
  onSingleFilePostRender: (
368
    project: ProjectContext,
369
    renderedFile: RenderedFile,
370
  ) => {
371
    // if we have keep-tex then rename the input tex file to match the final output
372
    // file (but make sure it has a tex-friendly filename)
373
    if (renderedFile.format.render[kKeepTex]) {
374
      const finalOutputFile = renderedFile.file!;
375
      const texOutputFile =
376
        texSafeFilename(basename(finalOutputFile, extname(finalOutputFile))) +
377
        ".tex";
378
      Deno.renameSync(
379
        join(project.dir, "index.tex"),
380
        join(project.dir, texOutputFile),
381
      );
382
    }
383
  },
384
};
385
type LineProcessor = (line: string) => string | undefined;
386

387
function pdfLatexPostProcessor(
388
  flags: PandocFlags,
389
  format: Format,
390
  temp: TempContext,
391
) {
392
  return async (output: string) => {
393
    const lineProcessors: LineProcessor[] = [
394
      sidecaptionLineProcessor(),
395
      calloutFloatHoldLineProcessor(),
396
      tableColumnMarginLineProcessor(),
397
      guidsProcessor(),
398
    ];
399

400
    if (format.pandoc[kCiteMethod] === "biblatex") {
401
      lineProcessors.push(bibLatexBibligraphyRefsDivProcessor());
402
    } else if (format.pandoc[kCiteMethod] === "natbib") {
403
      lineProcessors.push(
404
        natbibBibligraphyRefsDivProcessor(
405
          format.metadata[kBibliography] as string[] | undefined,
406
        ),
407
      );
408
    }
409

410
    const marginCites = format.metadata[kCitationLocation] === "margin";
411
    const renderedCites = {};
412
    if (marginCites) {
413
      // Based upon the cite method, post process the file to
414
      // process unresolved citations
415
      if (format.pandoc[kCiteMethod] === "biblatex") {
416
        lineProcessors.push(suppressBibLatexBibliographyLineProcessor());
417
        lineProcessors.push(bibLatexCiteLineProcessor());
418
      } else if (format.pandoc[kCiteMethod] === "natbib") {
419
        lineProcessors.push(suppressNatbibBibliographyLineProcessor());
420
        lineProcessors.push(natbibCiteLineProcessor());
421
      } else {
422
        // If this is using the pandoc default citeproc, we need to
423
        // do a more complex processing, since it is generating raw latex
424
        // for the citations (not running a tool in the pdf chain to
425
        // generate the bibliography). As a result, we first read the
426
        // rendered bibliography, indexing the entring and removing it
427
        // from the latex, then we run a second pass where we use that index
428
        // to replace cites with the rendered versions.
429
        lineProcessors.push(
430
          indexAndSuppressPandocBibliography(renderedCites),
431
          cleanReferencesChapter(),
432
        );
433
      }
434
    }
435

436
    // Move longtable captions below if requested
437
    if (tblCapLocation(format) === kCapBottom) {
438
      lineProcessors.push(longtableBottomCaptionProcessor());
439
    }
440

441
    // If enabled, switch to sidenote footnotes
442
    if (marginRefs(flags, format)) {
443
      // Replace notes with side notes
444
      lineProcessors.push(sideNoteLineProcessor());
445
    }
446
    lineProcessors.push(captionFootnoteLineProcessor());
447

448
    if (
449
      format.metadata[kCodeAnnotations] as boolean !== false &&
450
      format.metadata[kCodeAnnotations] as string !== "none"
451
    ) {
452
      lineProcessors.push(codeAnnotationPostProcessor());
453
      lineProcessors.push(codeListAnnotationPostProcessor());
454
    }
455

456
    lineProcessors.push(tableSidenoteProcessor());
457

458
    // This is pass 1
459
    await processLines(output, lineProcessors, temp);
460

461
    // This is pass 2; we need these to happen after the first pass
462
    const pass2Processors: LineProcessor[] = [
463
      longTableSidenoteProcessor(),
464
    ];
465
    if (Object.keys(renderedCites).length > 0) {
466
      pass2Processors.push(placePandocBibliographyEntries(renderedCites));
467
    }
468
    await processLines(output, pass2Processors, temp);
469
  };
470
}
471

472
function tblCapLocation(format: Format) {
473
  return format.metadata[kTblCapLoc] || format.metadata[kCapLoc] || kCapTop;
474
}
475

476
function figCapLocation(format: Format) {
477
  return format.metadata[kFigCapLoc] || format.metadata[kCapLoc] || kCapBottom;
478
}
479

480
function marginRefs(flags: PandocFlags, format: Format) {
481
  return format.pandoc[kReferenceLocation] === "margin" ||
482
    flags[kReferenceLocation] === "margin";
483
}
484

485
// Processes the lines of an input file, processing each line
486
// and replacing the input file with the processed output file
487
async function processLines(
488
  inputFile: string,
489
  lineProcessors: LineProcessor[],
490
  temp: TempContext,
491
) {
492
  // The temp file we generate into
493
  const outputFile = temp.createFile({ suffix: ".tex" });
494
  const file = await Deno.open(inputFile);
495
  // Preserve the existing permissions as we'll replace
496
  const mode = safeModeFromFile(inputFile);
497
  try {
498
    for await (const line of readLines(file)) {
499
      let processedLine: string | undefined = line;
500
      // Give each processor a shot at the line
501
      for (const processor of lineProcessors) {
502
        if (processedLine !== undefined) {
503
          processedLine = processor(processedLine);
504
        }
505
      }
506

507
      // skip lines that a processor has 'eaten'
508
      if (processedLine !== undefined) {
509
        Deno.writeTextFileSync(outputFile, processedLine + "\n", {
510
          append: true,
511
          mode,
512
        });
513
      }
514
    }
515
  } finally {
516
    file.close();
517

518
    // Always overwrite the input file with an incompletely processed file
519
    // which should make debugging the error easier (I hope)
520
    copyTo(outputFile, inputFile);
521
  }
522
}
523

524
const kBeginScanRegex = /^%quartopost-sidecaption-206BE349/;
525
const kEndScanRegex = /^%\/quartopost-sidecaption-206BE349/;
526

527
const sidecaptionLineProcessor = () => {
528
  let state: "scanning" | "replacing" = "scanning";
529
  return (line: string): string | undefined => {
530
    switch (state) {
531
      case "scanning":
532
        if (line.match(kBeginScanRegex)) {
533
          state = "replacing";
534
          return kbeginLongTablesideCap;
535
        } else {
536
          return line;
537
        }
538

539
      case "replacing":
540
        if (line.match(kEndScanRegex)) {
541
          state = "scanning";
542
          return kEndLongTableSideCap;
543
        } else {
544
          return line;
545
        }
546
    }
547
  };
548
};
549

550
// Reads the first command encountered as a balanced command
551
// (e.g. \caption{...} or \footnote{...}) and returns
552
// the complete command
553
//
554
// This expects the latex string to start with the command
555
const readBalancedCommand = (latex: string) => {
556
  let braceCount = 0;
557
  let entered = false;
558
  const chars: string[] = [];
559
  for (let i = 0; i < latex.length; i++) {
560
    const char = latex.charAt(i);
561
    if (char === "{") {
562
      braceCount++;
563
      entered = true;
564
    } else if (char === "}") {
565
      braceCount--;
566
    }
567

568
    chars.push(char);
569
    if (entered && braceCount === 0) {
570
      break;
571
    }
572
  }
573
  return chars.join("");
574
};
575

576
// Process element caption footnotes on a latex string
577
// This expects a latex elements with a `\caption{}`
578
//
579
// It will extract footnotes from the caption and replace
580
// them with a footnote mark and position the footnote
581
// below the latex element (e.g. it will remove the footnote
582
// from the element and then return the footnote below
583
// the element)
584
const processElementCaptionFootnotes = (latexFigure: string) => {
585
  const footnoteMark = "\\footnote{";
586
  const captionMark = "\\caption{";
587

588
  // Contents holds the final contents that will be returned
589
  // after being joined. This function will append to contents
590
  // to build up the final output
591
  const contents: string[] = [];
592

593
  // Read up to the caption itself
594
  const captionIndex = latexFigure.indexOf(captionMark);
595
  if (captionIndex > -1) {
596
    // Slice off the figure up to the caption
597
    contents.push(latexFigure.substring(0, captionIndex));
598
    const captionStartStr = latexFigure.slice(captionIndex);
599

600
    // Read the caption
601
    const captionLatex = readBalancedCommand(captionStartStr);
602
    const figureSuffix = captionStartStr.slice(captionLatex.length);
603

604
    // Slice off the command prefix and suffix
605
    let captionContents = captionLatex.slice(
606
      captionMark.length,
607
      captionLatex.length - 1,
608
    );
609

610
    // Deal with footnotes in the caption
611
    let footNoteIndex = captionContents.indexOf(footnoteMark);
612
    if (footNoteIndex > -1) {
613
      // Caption text will not have any footnotes in it
614
      const captionText: string[] = [];
615
      // Caption with note will have footnotemarks in it
616
      const captionWithNote: string[] = [];
617
      // The footnotes that we found along the way
618
      const footNotes: string[] = [];
619
      while (footNoteIndex > -1) {
620
        // capture any prefix
621
        const prefix = captionContents.substring(0, footNoteIndex);
622
        captionContents = captionContents.slice(footNoteIndex);
623

624
        // push the prefix onto the captions
625
        captionText.push(prefix);
626
        captionWithNote.push(prefix);
627

628
        // process the footnote
629
        const footnoteLatex = readBalancedCommand(captionContents);
630
        captionContents = captionContents.slice(footnoteLatex.length);
631
        footNoteIndex = captionContents.indexOf(footnoteMark);
632

633
        // Capture the footnote and place a footnote mark in the caption
634
        captionWithNote.push("\\footnotemark{}");
635
        footNotes.push(
636
          footnoteLatex.slice(footnoteMark.length, footnoteLatex.length - 1),
637
        );
638
      }
639
      // Push any leftovers onto the caption contents
640
      captionText.push(captionContents);
641
      captionWithNote.push(captionContents);
642

643
      // push the caption onto the contents
644
      contents.push(
645
        `\\caption[${captionText.join("")}]{${captionWithNote.join("")}}`,
646
      );
647

648
      // push the suffix onto the contents
649
      contents.push(figureSuffix);
650

651
      // push the footnotes on the contents
652
      contents.push("\n");
653

654
      // Add a proper footnote counter offset, if necessary
655
      if (footNotes.length > 1) {
656
        contents.push(`\\addtocounter{footnote}{-${footNotes.length - 1}}`);
657
      }
658

659
      for (let i = 0; i < footNotes.length; i++) {
660
        contents.push(`\\footnotetext{${footNotes[i]}}`);
661
        if (footNotes.length > 1 && i < footNotes.length - 1) {
662
          contents.push(`\\addtocounter{footnote}{1}`);
663
        }
664
      }
665
      return contents.join("");
666
    } else {
667
      // No footnotes in the caption, just leave it alone
668
      return latexFigure;
669
    }
670
  } else {
671
    // No caption means just let it go
672
    return latexFigure;
673
  }
674
};
675

676
const kMatchLongTableSize = /^(.*)p{\(\\columnwidth - (\d+\\tabcolsep\).*$)/;
677

678
const kStartLongTable = /^\\begin{longtable}/;
679
const kEndLongTable = /^\\end{longtable}/;
680

681
const guidsProcessor = () => {
682
  let state: "looking-for-definition-start" | "looking-for-definition-end" =
683
    "looking-for-definition-start";
684
  const guidDefinitions: [string, string][] = [];
685
  let guidBeingProcessed: string | undefined;
686
  let guidContents: string[] = [];
687
  return (line: string): string | undefined => {
688
    switch (state) {
689
      case "looking-for-definition-start": {
690
        if (line.startsWith("%quarto-define-uuid: ")) {
691
          state = "looking-for-definition-end";
692
          line = line.replace(/^%quarto-define-uuid:\s*/, "");
693
          guidBeingProcessed = line.trim();
694
          return undefined;
695
        }
696
        for (const [key, value] of guidDefinitions) {
697
          line = line.replaceAll(key, value);
698
        }
699
        return line;
700
      }
701
      case "looking-for-definition-end": {
702
        if (line === "%quarto-end-define-uuid") {
703
          state = "looking-for-definition-start";
704
          if (guidBeingProcessed === undefined) {
705
            throw new Error("guidBeingProcessed is undefined");
706
          }
707
          guidDefinitions.push([
708
            guidBeingProcessed,
709
            guidContents.join("").trim(),
710
          ]);
711
          guidContents = [];
712
          guidBeingProcessed = undefined;
713
          return undefined;
714
        } else {
715
          guidContents.push(line);
716
          return undefined;
717
        }
718
      }
719
    }
720
  };
721
};
722

723
const tableColumnMarginLineProcessor = () => {
724
  let state: "looking-for-boundaries" | "looking-for-tables" | "processing" =
725
    "looking-for-boundaries";
726
  return (line: string): string | undefined => {
727
    switch (state) {
728
      case "looking-for-boundaries": {
729
        if (line === "% quarto-tables-in-margin-AB1927C9:begin") {
730
          state = "looking-for-tables";
731
          return undefined;
732
        }
733
        return line;
734
      }
735
      case "looking-for-tables": {
736
        if (line.match(kStartLongTable)) {
737
          state = "processing";
738
          return line;
739
        } else if (line === "% quarto-tables-in-margin-AB1927C9:end") {
740
          state = "looking-for-boundaries";
741
          return undefined;
742
        }
743
        return line;
744
      }
745
      case "processing": {
746
        if (line.match(kEndLongTable)) {
747
          state = "looking-for-tables";
748
          return line;
749
        } else {
750
          const match = line.match(kMatchLongTableSize);
751
          if (match) {
752
            return `${
753
              match[1]
754
            }p{(\\marginparwidth + \\marginparsep + \\columnwidth - ${
755
              match[2]
756
            }`;
757
          } else {
758
            return line;
759
          }
760
        }
761
      }
762
      default: {
763
        return line;
764
      }
765
    }
766
  };
767
};
768

769
const captionFootnoteLineProcessor = () => {
770
  let state: "scanning" | "capturing" = "scanning";
771
  let capturedLines: string[] = [];
772
  return (line: string): string | undefined => {
773
    switch (state) {
774
      case "scanning":
775
        if (line.match(/^\\begin{figure}.*$/)) {
776
          state = "capturing";
777
          capturedLines = [line];
778
          return undefined;
779
        } else {
780
          return line;
781
        }
782
      case "capturing":
783
        capturedLines.push(line);
784
        if (line.match(/^\\end{figure}%*$/)) {
785
          state = "scanning";
786

787
          // read the whole figure and clear any capture state
788
          const lines = capturedLines.join("\n");
789
          capturedLines = [];
790

791
          // Process the captions and relocate footnotes
792
          return processElementCaptionFootnotes(lines);
793
        } else {
794
          return undefined;
795
        }
796
    }
797
  };
798
};
799

800
const processSideNotes = (endMarker: string) => {
801
  return (latexLongTable: string) => {
802
    const sideNoteMarker = "\\sidenote{\\footnotesize ";
803
    let strProcessing = latexLongTable;
804
    const strOutput: string[] = [];
805
    const sidenotes: string[] = [];
806

807
    let sidenotePos = strProcessing.indexOf(sideNoteMarker);
808
    while (sidenotePos > -1) {
809
      strOutput.push(strProcessing.substring(0, sidenotePos));
810

811
      const remainingStr = strProcessing.substring(
812
        sidenotePos + sideNoteMarker.length,
813
      );
814
      let escaped = false;
815
      let sideNoteEnd = -1;
816
      for (let i = 0; i < remainingStr.length; i++) {
817
        const ch = remainingStr[i];
818
        if (ch === "\\") {
819
          escaped = true;
820
        } else {
821
          if (!escaped && ch === "}") {
822
            sideNoteEnd = i;
823
            break;
824
          } else {
825
            escaped = false;
826
          }
827
        }
828
      }
829

830
      if (sideNoteEnd > -1) {
831
        strOutput.push("\\sidenotemark{}");
832
        const contents = remainingStr.substring(0, sideNoteEnd);
833
        sidenotes.push(contents);
834
        strProcessing = remainingStr.substring(sideNoteEnd + 1);
835
        sidenotePos = strProcessing.indexOf(sideNoteMarker);
836
      } else {
837
        strOutput.push(remainingStr);
838
      }
839
    }
840

841
    // Ensure that we inject sidenotes after the longtable
842
    const endTable = endMarker;
843
    const endPos = strProcessing.indexOf(endTable);
844
    const prefix = strProcessing.substring(0, endPos + endTable.length);
845
    const suffix = strProcessing.substring(
846
      endPos + endTable.length,
847
      strProcessing.length,
848
    );
849

850
    strOutput.push(prefix);
851
    for (const note of sidenotes) {
852
      strOutput.push(`\\sidenotetext{${note}}\n`);
853
    }
854
    if (suffix) {
855
      strOutput.push(suffix);
856
    }
857

858
    return strOutput.join("");
859
  };
860
};
861

862
const processLongTableSidenotes = processSideNotes("\\end{longtable}");
863
const processTableSidenotes = processSideNotes("\\end{table}");
864

865
const sideNoteProcessor = (
866
  beginRegex: RegExp,
867
  endRegex: RegExp,
868
  callback: (str: string) => string,
869
) => {
870
  return () => {
871
    let state: "scanning" | "capturing" = "scanning";
872
    let capturedLines: string[] = [];
873
    return (line: string): string | undefined => {
874
      switch (state) {
875
        case "scanning":
876
          if (line.match(beginRegex)) {
877
            state = "capturing";
878
            capturedLines = [line];
879
            return undefined;
880
          } else {
881
            return line;
882
          }
883
        case "capturing":
884
          capturedLines.push(line);
885
          if (line.match(endRegex)) {
886
            state = "scanning";
887

888
            // read the whole figure and clear any capture state
889
            const lines = capturedLines.join("\n");
890
            capturedLines = [];
891

892
            // Process the captions and relocate footnotes
893
            return callback(lines);
894
          } else {
895
            return undefined;
896
          }
897
      }
898
    };
899
  };
900
};
901
const longTableSidenoteProcessor = sideNoteProcessor(
902
  /^\\begin{longtable}.*$/,
903
  /^\\end{longtable}%*$/,
904
  processLongTableSidenotes,
905
);
906

907
const tableSidenoteProcessor = sideNoteProcessor(
908
  /^\\begin{table}.*$/,
909
  /^\\end{table}%*$/,
910
  processTableSidenotes,
911
);
912

913
const calloutFloatHoldLineProcessor = () => {
914
  let state: "scanning" | "replacing" = "scanning";
915
  return (line: string): string | undefined => {
916
    switch (state) {
917
      case "scanning":
918
        if (line.match(/^\\begin{tcolorbox}/)) {
919
          state = "replacing";
920
          return line;
921
        } else {
922
          return line;
923
        }
924

925
      case "replacing":
926
        if (line.match(/^\\end{tcolorbox}/)) {
927
          state = "scanning";
928
          return line;
929
        } else if (line.match(/^\\begin{figure}$/)) {
930
          return "\\begin{figure}[H]";
931
        } else if (line.match(/^\\begin{codelisting}$/)) {
932
          return "\\begin{codelisting}[H]";
933
        } else {
934
          return line;
935
        }
936
    }
937
  };
938
};
939

940
const kQuartoBibPlaceholderRegex = "%bib-loc-124C8010";
941
const bibLatexBibligraphyRefsDivProcessor = () => {
942
  let hasRefsDiv = false;
943
  return (line: string): string | undefined => {
944
    if (line === kQuartoBibPlaceholderRegex) {
945
      if (!hasRefsDiv) {
946
        hasRefsDiv = true;
947
        return "\\printbibliography[heading=none]";
948
      } else {
949
        // already seen a refs div, just ignore this one
950
        return undefined;
951
      }
952
    } else if (hasRefsDiv && line.match(/^\\printbibliography$/)) {
953
      return undefined;
954
    } else {
955
      return line;
956
    }
957
  };
958
};
959

960
const natbibBibligraphyRefsDivProcessor = (bibs?: string[]) => {
961
  let hasRefsDiv = false;
962
  return (line: string): string | undefined => {
963
    if (line === kQuartoBibPlaceholderRegex) {
964
      if (bibs && !hasRefsDiv) {
965
        hasRefsDiv = true;
966
        return `\\renewcommand{\\bibsection}{}\n\\bibliography{${
967
          bibs.join(",")
968
        }}`;
969
      } else {
970
        // already seen a refs div, just ignore this one
971
        return undefined;
972
      }
973
    } else if (hasRefsDiv && line.match(/^\s*\\bibliography{.*}$/)) {
974
      return undefined;
975
    } else {
976
      return line;
977
    }
978
  };
979
};
980

981
// Removes the biblatex \printbibiliography command
982
const suppressBibLatexBibliographyLineProcessor = () => {
983
  return (line: string): string | undefined => {
984
    if (line.match(/^\\printbibliography$/)) {
985
      return "";
986
    }
987
    return line;
988
  };
989
};
990

991
// Replaces the natbib bibligography declaration with a version
992
// that will not be printed in the PDF
993
const suppressNatbibBibliographyLineProcessor = () => {
994
  return (line: string): string | undefined => {
995
    return line.replace(/^\s*\\bibliography{(.*)}$/, (_match, bib) => {
996
      return `\\newsavebox\\mytempbib
997
\\savebox\\mytempbib{\\parbox{\\textwidth}{\\bibliography{${bib}}}}`;
998
    });
999
  };
1000
};
1001

1002
// {?quarto-cite:(id)}
1003
const kQuartoCiteRegex = /{\?quarto-cite:(.*?)}/g;
1004
const bibLatexCiteLineProcessor = () => {
1005
  return (line: string): string | undefined => {
1006
    return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1007
      return `\\fullcite{${citeKey}}`;
1008
    });
1009
  };
1010
};
1011

1012
const natbibCiteLineProcessor = () => {
1013
  return (line: string): string | undefined => {
1014
    return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1015
      return `\\bibentry{${citeKey}}`;
1016
    });
1017
  };
1018
};
1019

1020
const sideNoteLineProcessor = () => {
1021
  return (line: string): string | undefined => {
1022
    return line.replaceAll(/\\footnote{/g, "\\sidenote{\\footnotesize ");
1023
  };
1024
};
1025

1026
const longtableBottomCaptionProcessor = () => {
1027
  let scanning = false;
1028
  let capturing = false;
1029
  let caption: string | undefined;
1030

1031
  return (line: string): string | undefined => {
1032
    const isEndOfDocument = !!line.match(/^\\end{document}/);
1033
    if (isEndOfDocument && caption) {
1034
      return `${caption}\n${line}`;
1035
    } else if (scanning) {
1036
      // look for a caption line
1037
      if (capturing) {
1038
        caption = `${caption}\n${line}`;
1039
        capturing = !line.match(/\\tabularnewline$/);
1040
        return undefined;
1041
      } else {
1042
        if (
1043
          line.match(/^\\caption.*?\\tabularnewline$/) ||
1044
          line.match(/^\\caption{.*}\\\\$/)
1045
        ) {
1046
          caption = line;
1047
          return undefined;
1048
        } else if (line.match(/^\\caption.*?/)) {
1049
          caption = line;
1050
          capturing = true;
1051
          return undefined;
1052
        } else if (line.match(/^\\endlastfoot/) && caption) {
1053
          line = `\\tabularnewline\n${caption}\n${line}`;
1054
          caption = undefined;
1055
          return line;
1056
        } else if (line.match(/^\\end{longtable}$/)) {
1057
          scanning = false;
1058
          if (caption) {
1059
            line = caption + "\n" + line;
1060
            caption = undefined;
1061
            return line;
1062
          }
1063
        }
1064
      }
1065
    } else {
1066
      scanning = !!line.match(/^\\begin{longtable}/);
1067
    }
1068

1069
    return line;
1070
  };
1071
};
1072

1073
const kChapterRefNameRegex = /^\\chapter\*?{(.*?)}\\label{references.*?}$/;
1074
const cleanReferencesChapter = () => {
1075
  let refChapterName: string | undefined;
1076
  let refChapterContentsRegex: RegExp | undefined;
1077
  let refChapterMarkRegex: RegExp | undefined;
1078

1079
  return (line: string): string | undefined => {
1080
    const chapterRefMatch = line.match(kChapterRefNameRegex);
1081
    if (chapterRefMatch) {
1082
      refChapterName = chapterRefMatch[1];
1083
      refChapterContentsRegex = new RegExp(
1084
        `\\\\addcontentsline{toc}{chapter}{${refChapterName}}`,
1085
      );
1086
      refChapterMarkRegex = new RegExp(
1087
        `\\\\markboth{${refChapterName}}{${refChapterName}}`,
1088
      );
1089
      // Eat this line
1090
      return undefined;
1091
    } else if (refChapterContentsRegex && line.match(refChapterContentsRegex)) {
1092
      // Eat this line
1093
      return undefined;
1094
    } else if (refChapterMarkRegex && line.match(refChapterMarkRegex)) {
1095
      // Eat this line
1096
      return undefined;
1097
    }
1098
    return line;
1099
  };
1100
};
1101

1102
const indexAndSuppressPandocBibliography = (
1103
  renderedCites: Record<string, string[]>,
1104
) => {
1105
  let readingBibliography = false;
1106
  let currentCiteKey: string | undefined = undefined;
1107

1108
  return (line: string): string | undefined => {
1109
    if (
1110
      !readingBibliography &&
1111
      line.match(/^(\\protect)?\\phantomsection\\label{refs}$/)
1112
    ) {
1113
      readingBibliography = true;
1114
      return undefined;
1115
    } else if (readingBibliography && line.match(/^\\end{CSLReferences}$/)) {
1116
      readingBibliography = false;
1117
      return undefined;
1118
    } else if (readingBibliography) {
1119
      const matches = line.match(/\\bibitem\[\\citeproctext\]{ref\-(.*?)}/);
1120
      if (matches && matches[1]) {
1121
        currentCiteKey = matches[1];
1122
        renderedCites[currentCiteKey] = [line];
1123
      } else if (line.length === 0) {
1124
        currentCiteKey = undefined;
1125
      } else if (currentCiteKey) {
1126
        renderedCites[currentCiteKey].push(line);
1127
      }
1128
    }
1129

1130
    if (readingBibliography) {
1131
      return undefined;
1132
    } else {
1133
      return line;
1134
    }
1135
  };
1136
};
1137

1138
const kInSideCaptionRegex = /^\\sidecaption{/;
1139
const kBeginFigureRegex = /^\\begin{figure}\[.*?\]$/;
1140
const kEndFigureRegex = /^\\end{figure}\%?$/;
1141

1142
const placePandocBibliographyEntries = (
1143
  renderedCites: Record<string, string[]>,
1144
) => {
1145
  let biblioEntryState: "scanning" | "in-figure" | "in-sidecaption" =
1146
    "scanning";
1147
  let pendingCiteKeys: string[] = [];
1148

1149
  return (line: string): string | undefined => {
1150
    switch (biblioEntryState) {
1151
      case "scanning": {
1152
        if (line.match(kBeginFigureRegex)) {
1153
          biblioEntryState = "in-figure";
1154
        }
1155
        break;
1156
      }
1157
      case "in-figure": {
1158
        if (line.match(kInSideCaptionRegex)) {
1159
          biblioEntryState = "in-sidecaption";
1160
        } else {
1161
          if (line.match(kEndFigureRegex)) {
1162
            biblioEntryState = "scanning";
1163
          }
1164
        }
1165
        break;
1166
      }
1167
      case "in-sidecaption": {
1168
        if (line.match(kEndFigureRegex)) {
1169
          biblioEntryState = "scanning";
1170
        }
1171
        break;
1172
      }
1173
      default:
1174
        break;
1175
    }
1176

1177
    if (biblioEntryState === "scanning" && pendingCiteKeys.length > 0) {
1178
      const result = [
1179
        line,
1180
        "\n\\begin{CSLReferences}{2}{0}",
1181
        ...pendingCiteKeys,
1182
        "\\end{CSLReferences}\n",
1183
      ].join("\n");
1184
      pendingCiteKeys = [];
1185
      return result;
1186
    }
1187

1188
    return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1189
      const citeLines = renderedCites[citeKey];
1190
      if (citeLines) {
1191
        if (biblioEntryState === "in-sidecaption" && citeLines.length > 0) {
1192
          pendingCiteKeys.push(citeLines[0]);
1193
          return ["", ...citeLines.slice(1)].join("\n");
1194
        } else {
1195
          return [
1196
            "\n\\begin{CSLReferences}{2}{0}",
1197
            ...citeLines,
1198
            "\\end{CSLReferences}\n",
1199
          ].join("\n");
1200
        }
1201
      } else {
1202
        return citeKey;
1203
      }
1204
    });
1205
  };
1206
};
1207

1208
const kCodeAnnotationRegex =
1209
  /(.*)\\CommentTok\{(.*?)[^\s]+? +\\textless\{\}(\d+)\\textgreater\{\}.*\}$/gm;
1210
const kCodePlainAnnotationRegex = /(.*)% \((\d+)\)$/g;
1211
const codeAnnotationPostProcessor = () => {
1212
  let lastAnnotation: string | undefined;
1213

1214
  return (line: string): string | undefined => {
1215
    if (line === "\\begin{Shaded}") {
1216
      lastAnnotation = undefined;
1217
    }
1218

1219
    // Replace colorized code
1220
    line = line.replaceAll(
1221
      kCodeAnnotationRegex,
1222
      (_match, prefix: string, comment: string, annotationNumber: string) => {
1223
        if (annotationNumber !== lastAnnotation) {
1224
          lastAnnotation = annotationNumber;
1225
          if (comment.length > 0) {
1226
            // There is something else inside the comment line so
1227
            // We need to recreate the comment line without the annotation
1228
            prefix = `${prefix}\\CommentTok\{${comment}\}`;
1229
          }
1230
          return `${prefix}\\hspace*{\\fill}\\NormalTok{\\circled{${annotationNumber}}}`;
1231
        } else {
1232
          return `${prefix}`;
1233
        }
1234
      },
1235
    );
1236

1237
    // Replace plain code
1238
    line = line.replaceAll(
1239
      kCodePlainAnnotationRegex,
1240
      (_match, prefix: string, annotationNumber: string) => {
1241
        if (annotationNumber !== lastAnnotation) {
1242
          lastAnnotation = annotationNumber;
1243

1244
          const replaceValue = `(${annotationNumber})`;
1245
          const paddingNumber = Math.max(
1246
            0,
1247
            75 - prefix.length - replaceValue.length,
1248
          );
1249
          const padding = " ".repeat(paddingNumber);
1250
          return `${prefix}${padding}${replaceValue}`;
1251
        } else {
1252
          return `${prefix}`;
1253
        }
1254
      },
1255
    );
1256

1257
    return line;
1258
  };
1259
};
1260

1261
const kListAnnotationRegex = /(.*)5CB6E08D-list-annote-(\d+)(.*)/g;
1262
const codeListAnnotationPostProcessor = () => {
1263
  return (line: string): string | undefined => {
1264
    return line.replaceAll(
1265
      kListAnnotationRegex,
1266
      (_match, prefix: string, annotationNumber: string, suffix: string) => {
1267
        return `${prefix}\\circled{${annotationNumber}}${suffix}`;
1268
      },
1269
    );
1270
  };
1271
};
1272

1273
const kbeginLongTablesideCap = `{
1274
\\makeatletter
1275
\\def\\LT@makecaption#1#2#3{%
1276
  \\noalign{\\smash{\\hbox{\\kern\\textwidth\\rlap{\\kern\\marginparsep
1277
  \\parbox[t]{\\marginparwidth}{%
1278
    \\footnotesize{%
1279
      \\vspace{(1.1\\baselineskip)}
1280
    #1{#2: }\\ignorespaces #3}}}}}}%
1281
    }
1282
\\makeatother`;
1283

1284
const kEndLongTableSideCap = "}";
1285

1286
// LaTeX-supported PDF standards (from latex3/latex2e DocumentMetadata)
1287
// See: https://github.com/latex3/latex2e - documentmetadata-support.dtx
1288
const kLatexSupportedStandards = new Set([
1289
  // PDF/A standards (note: a-1a is NOT supported, only a-1b)
1290
  "a-1b",
1291
  "a-2a",
1292
  "a-2b",
1293
  "a-2u",
1294
  "a-3a",
1295
  "a-3b",
1296
  "a-3u",
1297
  "a-4",
1298
  "a-4e",
1299
  "a-4f",
1300
  // PDF/X standards
1301
  "x-4",
1302
  "x-4p",
1303
  "x-5g",
1304
  "x-5n",
1305
  "x-5pg",
1306
  "x-6",
1307
  "x-6n",
1308
  "x-6p",
1309
  // PDF/UA standards (only ua-2 is supported by LaTeX)
1310
  "ua-2",
1311
]);
1312

1313
// Standards that require PDF tagging (document structure)
1314
// - PDF/A level "a" variants require tagged structure per PDF/A spec
1315
// - PDF/UA standards require tagging for universal accessibility
1316
//   (LaTeX does NOT automatically enable tagging for UA standards)
1317
const kTaggingRequiredStandards = new Set([
1318
  "a-2a",
1319
  "a-3a",
1320
  "ua-1",
1321
  "ua-2",
1322
]);
1323

1324
const kVersionPattern = /^(1\.[4-7]|2\.0)$/;
1325

1326
// PDF version required by each standard (maximum version limits)
1327
// LaTeX defaults to PDF 2.0 with \DocumentMetadata, but some standards
1328
// have maximum version requirements that are incompatible with 2.0
1329
// Note: a-1a is intentionally omitted as LaTeX doesn't support it
1330
const kStandardRequiredVersion: Record<string, string> = {
1331
  // PDF/A-1 requires exactly PDF 1.4 (only a-1b supported by LaTeX)
1332
  "a-1b": "1.4",
1333
  // PDF/A-2 and PDF/A-3 have maximum version of 1.7
1334
  "a-2a": "1.7",
1335
  "a-2b": "1.7",
1336
  "a-2u": "1.7",
1337
  "a-3a": "1.7",
1338
  "a-3b": "1.7",
1339
  "a-3u": "1.7",
1340
  // PDF/A-4, PDF/UA-1, PDF/UA-2 all work with PDF 2.0 (the default)
1341
};
1342

1343
function normalizePdfStandardForLatex(
1344
  standards: unknown[],
1345
): { version?: string; standards: string[]; needsTagging: boolean } {
1346
  let version: string | undefined;
1347
  const result: string[] = [];
1348
  let needsTagging = false;
1349

1350
  for (const s of standards) {
1351
    // Convert to string - YAML may parse versions like 2.0 as integer 2
1352
    let str: string;
1353
    if (typeof s === "number") {
1354
      // Handle YAML numeric parsing: integer 2 -> "2.0", float 1.4 -> "1.4"
1355
      str = Number.isInteger(s) ? `${s}.0` : String(s);
1356
    } else if (typeof s === "string") {
1357
      str = s;
1358
    } else {
1359
      continue;
1360
    }
1361
    // Normalize: lowercase, remove any "pdf" prefix
1362
    const normalized = str.toLowerCase().replace(/^pdf[/-]?/, "");
1363

1364
    if (kVersionPattern.test(normalized)) {
1365
      // Use first explicit version (ignore subsequent ones)
1366
      if (!version) {
1367
        version = normalized;
1368
      }
1369
    } else if (kLatexSupportedStandards.has(normalized)) {
1370
      // LaTeX is case-insensitive, pass through lowercase
1371
      result.push(normalized);
1372
      // Check if this standard requires tagging
1373
      if (kTaggingRequiredStandards.has(normalized)) {
1374
        needsTagging = true;
1375
      }
1376
      // Infer required PDF version from standard (if not explicitly set)
1377
      if (!version && kStandardRequiredVersion[normalized]) {
1378
        version = kStandardRequiredVersion[normalized];
1379
      }
1380
    } else {
1381
      warning(
1382
        `PDF standard '${s}' is not supported by LaTeX and will be ignored`,
1383
      );
1384
    }
1385
  }
1386

1387
  return { version, standards: result, needsTagging };
1388
}
1389

1390
Product

Resources

Company