Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
quarto-dev
GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/src/format/pdf/format-pdf.ts
12925 views
1
/*
2
* format-pdf.ts
3
*
4
* Copyright (C) 2020-2022 Posit Software, PBC
5
*/
6
7
import { basename, extname, join } from "../../deno_ral/path.ts";
8
9
import { mergeConfigs } from "../../core/config.ts";
10
import { texSafeFilename } from "../../core/tex.ts";
11
12
import {
13
kBibliography,
14
kCapBottom,
15
kCapLoc,
16
kCapTop,
17
kCitationLocation,
18
kCiteMethod,
19
kClassOption,
20
kDefaultImageExtension,
21
kDocumentClass,
22
kEcho,
23
kFigCapLoc,
24
kFigDpi,
25
kFigFormat,
26
kFigHeight,
27
kFigWidth,
28
kHeaderIncludes,
29
kKeepTex,
30
kLang,
31
kNumberSections,
32
kPaperSize,
33
kPdfEngine,
34
kPdfStandard,
35
kPdfStandardApplied,
36
kReferenceLocation,
37
kShiftHeadingLevelBy,
38
kTblCapLoc,
39
kTopLevelDivision,
40
kWarning,
41
pdfStandardEnv,
42
} from "../../config/constants.ts";
43
import { warning } from "../../deno_ral/log.ts";
44
import { asArray } from "../../core/array.ts";
45
import { Format, FormatExtras, PandocFlags } from "../../config/types.ts";
46
47
import { createFormat } from "../formats-shared.ts";
48
49
import { RenderedFile, RenderServices } from "../../command/render/types.ts";
50
import { ProjectConfig, ProjectContext } from "../../project/types.ts";
51
import { BookExtension } from "../../project/types/book/book-shared.ts";
52
53
import { readLines } from "io/read-lines";
54
import { TempContext } from "../../core/temp.ts";
55
import { isLatexPdfEngine, pdfEngine } from "../../config/pdf.ts";
56
import { formatResourcePath } from "../../core/resources.ts";
57
import { kTemplatePartials } from "../../command/render/template.ts";
58
import { copyTo } from "../../core/copy.ts";
59
import { kCodeAnnotations } from "../html/format-html-shared.ts";
60
import { safeModeFromFile } from "../../deno_ral/fs.ts";
61
import { hasLevelOneHeadings as hasL1Headings } from "../../core/lib/markdown-analysis/level-one-headings.ts";
62
63
export function pdfFormat(): Format {
64
return mergeConfigs(
65
createPdfFormat("PDF"),
66
{
67
extensions: {
68
book: pdfBookExtension,
69
},
70
},
71
);
72
}
73
74
export function beamerFormat(): Format {
75
return createFormat(
76
"Beamer",
77
"pdf",
78
createPdfFormat("Beamer", false, false),
79
{
80
execute: {
81
[kFigWidth]: 10,
82
[kFigHeight]: 7,
83
[kEcho]: false,
84
[kWarning]: false,
85
},
86
classoption: ["notheorems"],
87
},
88
);
89
}
90
91
export function latexFormat(displayName: string): Format {
92
return createFormat(
93
displayName,
94
"tex",
95
mergeConfigs(
96
createPdfFormat(displayName),
97
{
98
extensions: {
99
book: {
100
onSingleFilePreRender: (
101
format: Format,
102
_config?: ProjectConfig,
103
) => {
104
// If we're targeting LaTeX output, be sure to keep
105
// the supporting files around (since we're not building
106
// them into a PDF)
107
format.render[kKeepTex] = true;
108
return format;
109
},
110
formatOutputDirectory: () => {
111
return "book-latex";
112
},
113
},
114
},
115
},
116
),
117
);
118
}
119
120
function createPdfFormat(
121
displayName: string,
122
autoShiftHeadings = true,
123
koma = true,
124
): Format {
125
return createFormat(
126
displayName,
127
"pdf",
128
{
129
execute: {
130
[kFigWidth]: 5.5,
131
[kFigHeight]: 3.5,
132
[kFigFormat]: "pdf",
133
[kFigDpi]: 300,
134
},
135
pandoc: {
136
[kPdfEngine]: "lualatex",
137
standalone: true,
138
variables: {
139
graphics: true,
140
tables: true,
141
},
142
[kDefaultImageExtension]: "pdf",
143
},
144
metadata: {
145
["block-headings"]: true,
146
},
147
formatExtras: async (
148
_input: string,
149
markdown: string,
150
flags: PandocFlags,
151
format: Format,
152
_libDir: string,
153
services: RenderServices,
154
) => {
155
const extras: FormatExtras = {};
156
157
// only apply extras if this is latex (as opposed to context)
158
const engine = pdfEngine(format.pandoc, format.render, flags);
159
if (!isLatexPdfEngine(engine)) {
160
return extras;
161
}
162
163
// Post processed for dealing with latex output
164
extras.postprocessors = [
165
pdfLatexPostProcessor(flags, format, services.temp),
166
];
167
168
// user may have overridden koma, check for that here
169
const documentclass = format.metadata[kDocumentClass] as
170
| string
171
| undefined;
172
173
const usingCustomTemplates = format.pandoc.template !== undefined ||
174
format.metadata[kTemplatePartials] !== undefined;
175
176
if (
177
usingCustomTemplates ||
178
(documentclass &&
179
![
180
"srcbook",
181
"scrreprt",
182
"scrreport",
183
"scrartcl",
184
"scrarticle",
185
].includes(
186
documentclass,
187
))
188
) {
189
koma = false;
190
}
191
192
// default to KOMA article class. we do this here rather than
193
// above so that projectExtras can override us
194
if (koma) {
195
// determine caption options
196
const captionOptions = [];
197
const tblCaploc = tblCapLocation(format);
198
captionOptions.push(
199
tblCaploc === kCapTop ? "tableheading" : "tablesignature",
200
);
201
if (figCapLocation(format) === kCapTop) {
202
captionOptions.push("figureheading");
203
}
204
205
// establish default class options
206
const defaultClassOptions = ["DIV=11"];
207
if (format.metadata[kLang] !== "de") {
208
defaultClassOptions.push("numbers=noendperiod");
209
}
210
211
// determine class options (filter by options already set by the user)
212
const userClassOptions = format.metadata[kClassOption] as
213
| string[]
214
| undefined;
215
const classOptions = defaultClassOptions.filter((option) => {
216
if (Array.isArray(userClassOptions)) {
217
const name = option.split("=")[0];
218
return !userClassOptions.some((userOption) =>
219
String(userOption).startsWith(name + "=")
220
);
221
} else {
222
return true;
223
}
224
});
225
226
const headerIncludes = [];
227
headerIncludes.push(
228
"\\KOMAoption{captions}{" + captionOptions.join(",") + "}",
229
);
230
231
extras.metadata = {
232
[kDocumentClass]: "scrartcl",
233
[kClassOption]: classOptions,
234
[kPaperSize]: "letter",
235
[kHeaderIncludes]: headerIncludes,
236
};
237
}
238
239
// Provide a custom template for this format
240
// Partials can be the one from Quarto division
241
const partialNamesQuarto: string[] = [
242
"babel-lang",
243
"before-bib",
244
"biblio",
245
"biblio-config",
246
"citations",
247
"doc-class",
248
"graphics",
249
"after-body",
250
"before-body",
251
"pandoc",
252
"tables",
253
"tightlist",
254
"before-title",
255
"title",
256
"toc",
257
];
258
// or the one from Pandoc division (since Pandoc 3.6.3)
259
const partialNamesPandoc: string[] = [
260
"after-header-includes",
261
"common",
262
"document-metadata",
263
"font-settings",
264
"fonts",
265
"hypersetup",
266
"passoptions",
267
];
268
269
const createTemplateContext = function (
270
to: string,
271
partialNamesQuarto: string[],
272
partialNamesPandoc: string[],
273
) {
274
return {
275
template: formatResourcePath(to, "pandoc/template.tex"),
276
partials: [
277
...partialNamesQuarto.map((name) => {
278
return formatResourcePath(to, `pandoc/${name}.tex`);
279
}),
280
...partialNamesPandoc.map((name) => {
281
return formatResourcePath(to, `pandoc/${name}.latex`);
282
}),
283
],
284
};
285
};
286
// Beamer doesn't use document-metadata partial (its template doesn't include it)
287
const beamerPartialNamesPandoc = partialNamesPandoc.filter(
288
(name) => name !== "document-metadata",
289
);
290
extras.templateContext = createTemplateContext(
291
displayName === "Beamer" ? "beamer" : "pdf",
292
partialNamesQuarto,
293
displayName === "Beamer"
294
? beamerPartialNamesPandoc
295
: partialNamesPandoc,
296
);
297
298
// Don't shift the headings if we see any H1s (we can't shift up any longer)
299
const hasLevelOneHeadings = await hasL1Headings(markdown);
300
301
// pdfs with no other heading level oriented options get their heading level shifted by -1
302
if (
303
!hasLevelOneHeadings &&
304
autoShiftHeadings &&
305
(flags?.[kNumberSections] === true ||
306
format.pandoc[kNumberSections] === true) &&
307
flags?.[kTopLevelDivision] === undefined &&
308
format.pandoc?.[kTopLevelDivision] === undefined &&
309
flags?.[kShiftHeadingLevelBy] === undefined &&
310
format.pandoc?.[kShiftHeadingLevelBy] === undefined
311
) {
312
extras.pandoc = {
313
[kShiftHeadingLevelBy]: -1,
314
};
315
}
316
317
// pdfs with document class scrbook get number sections turned on
318
// https://github.com/quarto-dev/quarto-cli/issues/2369
319
extras.pandoc = extras.pandoc || {};
320
if (
321
documentclass === "scrbook" &&
322
format.pandoc[kNumberSections] !== false &&
323
flags[kNumberSections] !== false
324
) {
325
extras.pandoc[kNumberSections] = true;
326
}
327
328
// Handle pdf-standard option for PDF/A, PDF/UA, PDF/X conformance
329
const pdfStandard = asArray(
330
format.render?.[kPdfStandard] ?? format.metadata?.[kPdfStandard] ??
331
pdfStandardEnv(),
332
);
333
if (pdfStandard.length > 0) {
334
const { version, standards, needsTagging } =
335
normalizePdfStandardForLatex(pdfStandard);
336
// Set pdfstandard as a map if there are standards or a version
337
if (standards.length > 0 || version) {
338
extras.pandoc.variables = extras.pandoc.variables || {};
339
const pdfstandardMap: Record<string, unknown> = {};
340
if (standards.length > 0) {
341
pdfstandardMap.standards = standards;
342
}
343
if (version) {
344
pdfstandardMap.version = version;
345
}
346
if (needsTagging) {
347
pdfstandardMap.tagging = true;
348
}
349
extras.pandoc.variables["pdfstandard"] = pdfstandardMap;
350
}
351
// Store applied standards in metadata for verapdf validation
352
// (only standards that LaTeX actually supports, not the original list)
353
if (standards.length > 0) {
354
extras.metadata = extras.metadata || {};
355
extras.metadata[kPdfStandardApplied] = standards;
356
}
357
}
358
359
return extras;
360
},
361
},
362
);
363
}
364
365
const pdfBookExtension: BookExtension = {
366
selfContainedOutput: true,
367
onSingleFilePostRender: (
368
project: ProjectContext,
369
renderedFile: RenderedFile,
370
) => {
371
// if we have keep-tex then rename the input tex file to match the final output
372
// file (but make sure it has a tex-friendly filename)
373
if (renderedFile.format.render[kKeepTex]) {
374
const finalOutputFile = renderedFile.file!;
375
const texOutputFile =
376
texSafeFilename(basename(finalOutputFile, extname(finalOutputFile))) +
377
".tex";
378
Deno.renameSync(
379
join(project.dir, "index.tex"),
380
join(project.dir, texOutputFile),
381
);
382
}
383
},
384
};
385
type LineProcessor = (line: string) => string | undefined;
386
387
function pdfLatexPostProcessor(
388
flags: PandocFlags,
389
format: Format,
390
temp: TempContext,
391
) {
392
return async (output: string) => {
393
const lineProcessors: LineProcessor[] = [
394
sidecaptionLineProcessor(),
395
calloutFloatHoldLineProcessor(),
396
tableColumnMarginLineProcessor(),
397
guidsProcessor(),
398
];
399
400
if (format.pandoc[kCiteMethod] === "biblatex") {
401
lineProcessors.push(bibLatexBibligraphyRefsDivProcessor());
402
} else if (format.pandoc[kCiteMethod] === "natbib") {
403
lineProcessors.push(
404
natbibBibligraphyRefsDivProcessor(
405
format.metadata[kBibliography] as string[] | undefined,
406
),
407
);
408
}
409
410
const marginCites = format.metadata[kCitationLocation] === "margin";
411
const renderedCites = {};
412
if (marginCites) {
413
// Based upon the cite method, post process the file to
414
// process unresolved citations
415
if (format.pandoc[kCiteMethod] === "biblatex") {
416
lineProcessors.push(suppressBibLatexBibliographyLineProcessor());
417
lineProcessors.push(bibLatexCiteLineProcessor());
418
} else if (format.pandoc[kCiteMethod] === "natbib") {
419
lineProcessors.push(suppressNatbibBibliographyLineProcessor());
420
lineProcessors.push(natbibCiteLineProcessor());
421
} else {
422
// If this is using the pandoc default citeproc, we need to
423
// do a more complex processing, since it is generating raw latex
424
// for the citations (not running a tool in the pdf chain to
425
// generate the bibliography). As a result, we first read the
426
// rendered bibliography, indexing the entring and removing it
427
// from the latex, then we run a second pass where we use that index
428
// to replace cites with the rendered versions.
429
lineProcessors.push(
430
indexAndSuppressPandocBibliography(renderedCites),
431
cleanReferencesChapter(),
432
);
433
}
434
}
435
436
// Move longtable captions below if requested
437
if (tblCapLocation(format) === kCapBottom) {
438
lineProcessors.push(longtableBottomCaptionProcessor());
439
}
440
441
// If enabled, switch to sidenote footnotes
442
if (marginRefs(flags, format)) {
443
// Replace notes with side notes
444
lineProcessors.push(sideNoteLineProcessor());
445
}
446
lineProcessors.push(captionFootnoteLineProcessor());
447
448
if (
449
format.metadata[kCodeAnnotations] as boolean !== false &&
450
format.metadata[kCodeAnnotations] as string !== "none"
451
) {
452
lineProcessors.push(codeAnnotationPostProcessor());
453
lineProcessors.push(codeListAnnotationPostProcessor());
454
}
455
456
lineProcessors.push(tableSidenoteProcessor());
457
458
// This is pass 1
459
await processLines(output, lineProcessors, temp);
460
461
// This is pass 2; we need these to happen after the first pass
462
const pass2Processors: LineProcessor[] = [
463
longTableSidenoteProcessor(),
464
];
465
if (Object.keys(renderedCites).length > 0) {
466
pass2Processors.push(placePandocBibliographyEntries(renderedCites));
467
}
468
await processLines(output, pass2Processors, temp);
469
};
470
}
471
472
function tblCapLocation(format: Format) {
473
return format.metadata[kTblCapLoc] || format.metadata[kCapLoc] || kCapTop;
474
}
475
476
function figCapLocation(format: Format) {
477
return format.metadata[kFigCapLoc] || format.metadata[kCapLoc] || kCapBottom;
478
}
479
480
function marginRefs(flags: PandocFlags, format: Format) {
481
return format.pandoc[kReferenceLocation] === "margin" ||
482
flags[kReferenceLocation] === "margin";
483
}
484
485
// Processes the lines of an input file, processing each line
486
// and replacing the input file with the processed output file
487
async function processLines(
488
inputFile: string,
489
lineProcessors: LineProcessor[],
490
temp: TempContext,
491
) {
492
// The temp file we generate into
493
const outputFile = temp.createFile({ suffix: ".tex" });
494
const file = await Deno.open(inputFile);
495
// Preserve the existing permissions as we'll replace
496
const mode = safeModeFromFile(inputFile);
497
try {
498
for await (const line of readLines(file)) {
499
let processedLine: string | undefined = line;
500
// Give each processor a shot at the line
501
for (const processor of lineProcessors) {
502
if (processedLine !== undefined) {
503
processedLine = processor(processedLine);
504
}
505
}
506
507
// skip lines that a processor has 'eaten'
508
if (processedLine !== undefined) {
509
Deno.writeTextFileSync(outputFile, processedLine + "\n", {
510
append: true,
511
mode,
512
});
513
}
514
}
515
} finally {
516
file.close();
517
518
// Always overwrite the input file with an incompletely processed file
519
// which should make debugging the error easier (I hope)
520
copyTo(outputFile, inputFile);
521
}
522
}
523
524
const kBeginScanRegex = /^%quartopost-sidecaption-206BE349/;
525
const kEndScanRegex = /^%\/quartopost-sidecaption-206BE349/;
526
527
const sidecaptionLineProcessor = () => {
528
let state: "scanning" | "replacing" = "scanning";
529
return (line: string): string | undefined => {
530
switch (state) {
531
case "scanning":
532
if (line.match(kBeginScanRegex)) {
533
state = "replacing";
534
return kbeginLongTablesideCap;
535
} else {
536
return line;
537
}
538
539
case "replacing":
540
if (line.match(kEndScanRegex)) {
541
state = "scanning";
542
return kEndLongTableSideCap;
543
} else {
544
return line;
545
}
546
}
547
};
548
};
549
550
// Reads the first command encountered as a balanced command
551
// (e.g. \caption{...} or \footnote{...}) and returns
552
// the complete command
553
//
554
// This expects the latex string to start with the command
555
const readBalancedCommand = (latex: string) => {
556
let braceCount = 0;
557
let entered = false;
558
const chars: string[] = [];
559
for (let i = 0; i < latex.length; i++) {
560
const char = latex.charAt(i);
561
if (char === "{") {
562
braceCount++;
563
entered = true;
564
} else if (char === "}") {
565
braceCount--;
566
}
567
568
chars.push(char);
569
if (entered && braceCount === 0) {
570
break;
571
}
572
}
573
return chars.join("");
574
};
575
576
// Process element caption footnotes on a latex string
577
// This expects a latex elements with a `\caption{}`
578
//
579
// It will extract footnotes from the caption and replace
580
// them with a footnote mark and position the footnote
581
// below the latex element (e.g. it will remove the footnote
582
// from the element and then return the footnote below
583
// the element)
584
const processElementCaptionFootnotes = (latexFigure: string) => {
585
const footnoteMark = "\\footnote{";
586
const captionMark = "\\caption{";
587
588
// Contents holds the final contents that will be returned
589
// after being joined. This function will append to contents
590
// to build up the final output
591
const contents: string[] = [];
592
593
// Read up to the caption itself
594
const captionIndex = latexFigure.indexOf(captionMark);
595
if (captionIndex > -1) {
596
// Slice off the figure up to the caption
597
contents.push(latexFigure.substring(0, captionIndex));
598
const captionStartStr = latexFigure.slice(captionIndex);
599
600
// Read the caption
601
const captionLatex = readBalancedCommand(captionStartStr);
602
const figureSuffix = captionStartStr.slice(captionLatex.length);
603
604
// Slice off the command prefix and suffix
605
let captionContents = captionLatex.slice(
606
captionMark.length,
607
captionLatex.length - 1,
608
);
609
610
// Deal with footnotes in the caption
611
let footNoteIndex = captionContents.indexOf(footnoteMark);
612
if (footNoteIndex > -1) {
613
// Caption text will not have any footnotes in it
614
const captionText: string[] = [];
615
// Caption with note will have footnotemarks in it
616
const captionWithNote: string[] = [];
617
// The footnotes that we found along the way
618
const footNotes: string[] = [];
619
while (footNoteIndex > -1) {
620
// capture any prefix
621
const prefix = captionContents.substring(0, footNoteIndex);
622
captionContents = captionContents.slice(footNoteIndex);
623
624
// push the prefix onto the captions
625
captionText.push(prefix);
626
captionWithNote.push(prefix);
627
628
// process the footnote
629
const footnoteLatex = readBalancedCommand(captionContents);
630
captionContents = captionContents.slice(footnoteLatex.length);
631
footNoteIndex = captionContents.indexOf(footnoteMark);
632
633
// Capture the footnote and place a footnote mark in the caption
634
captionWithNote.push("\\footnotemark{}");
635
footNotes.push(
636
footnoteLatex.slice(footnoteMark.length, footnoteLatex.length - 1),
637
);
638
}
639
// Push any leftovers onto the caption contents
640
captionText.push(captionContents);
641
captionWithNote.push(captionContents);
642
643
// push the caption onto the contents
644
contents.push(
645
`\\caption[${captionText.join("")}]{${captionWithNote.join("")}}`,
646
);
647
648
// push the suffix onto the contents
649
contents.push(figureSuffix);
650
651
// push the footnotes on the contents
652
contents.push("\n");
653
654
// Add a proper footnote counter offset, if necessary
655
if (footNotes.length > 1) {
656
contents.push(`\\addtocounter{footnote}{-${footNotes.length - 1}}`);
657
}
658
659
for (let i = 0; i < footNotes.length; i++) {
660
contents.push(`\\footnotetext{${footNotes[i]}}`);
661
if (footNotes.length > 1 && i < footNotes.length - 1) {
662
contents.push(`\\addtocounter{footnote}{1}`);
663
}
664
}
665
return contents.join("");
666
} else {
667
// No footnotes in the caption, just leave it alone
668
return latexFigure;
669
}
670
} else {
671
// No caption means just let it go
672
return latexFigure;
673
}
674
};
675
676
const kMatchLongTableSize = /^(.*)p{\(\\columnwidth - (\d+\\tabcolsep\).*$)/;
677
678
const kStartLongTable = /^\\begin{longtable}/;
679
const kEndLongTable = /^\\end{longtable}/;
680
681
const guidsProcessor = () => {
682
let state: "looking-for-definition-start" | "looking-for-definition-end" =
683
"looking-for-definition-start";
684
const guidDefinitions: [string, string][] = [];
685
let guidBeingProcessed: string | undefined;
686
let guidContents: string[] = [];
687
return (line: string): string | undefined => {
688
switch (state) {
689
case "looking-for-definition-start": {
690
if (line.startsWith("%quarto-define-uuid: ")) {
691
state = "looking-for-definition-end";
692
line = line.replace(/^%quarto-define-uuid:\s*/, "");
693
guidBeingProcessed = line.trim();
694
return undefined;
695
}
696
for (const [key, value] of guidDefinitions) {
697
line = line.replaceAll(key, value);
698
}
699
return line;
700
}
701
case "looking-for-definition-end": {
702
if (line === "%quarto-end-define-uuid") {
703
state = "looking-for-definition-start";
704
if (guidBeingProcessed === undefined) {
705
throw new Error("guidBeingProcessed is undefined");
706
}
707
guidDefinitions.push([
708
guidBeingProcessed,
709
guidContents.join("").trim(),
710
]);
711
guidContents = [];
712
guidBeingProcessed = undefined;
713
return undefined;
714
} else {
715
guidContents.push(line);
716
return undefined;
717
}
718
}
719
}
720
};
721
};
722
723
const tableColumnMarginLineProcessor = () => {
724
let state: "looking-for-boundaries" | "looking-for-tables" | "processing" =
725
"looking-for-boundaries";
726
return (line: string): string | undefined => {
727
switch (state) {
728
case "looking-for-boundaries": {
729
if (line === "% quarto-tables-in-margin-AB1927C9:begin") {
730
state = "looking-for-tables";
731
return undefined;
732
}
733
return line;
734
}
735
case "looking-for-tables": {
736
if (line.match(kStartLongTable)) {
737
state = "processing";
738
return line;
739
} else if (line === "% quarto-tables-in-margin-AB1927C9:end") {
740
state = "looking-for-boundaries";
741
return undefined;
742
}
743
return line;
744
}
745
case "processing": {
746
if (line.match(kEndLongTable)) {
747
state = "looking-for-tables";
748
return line;
749
} else {
750
const match = line.match(kMatchLongTableSize);
751
if (match) {
752
return `${
753
match[1]
754
}p{(\\marginparwidth + \\marginparsep + \\columnwidth - ${
755
match[2]
756
}`;
757
} else {
758
return line;
759
}
760
}
761
}
762
default: {
763
return line;
764
}
765
}
766
};
767
};
768
769
const captionFootnoteLineProcessor = () => {
770
let state: "scanning" | "capturing" = "scanning";
771
let capturedLines: string[] = [];
772
return (line: string): string | undefined => {
773
switch (state) {
774
case "scanning":
775
if (line.match(/^\\begin{figure}.*$/)) {
776
state = "capturing";
777
capturedLines = [line];
778
return undefined;
779
} else {
780
return line;
781
}
782
case "capturing":
783
capturedLines.push(line);
784
if (line.match(/^\\end{figure}%*$/)) {
785
state = "scanning";
786
787
// read the whole figure and clear any capture state
788
const lines = capturedLines.join("\n");
789
capturedLines = [];
790
791
// Process the captions and relocate footnotes
792
return processElementCaptionFootnotes(lines);
793
} else {
794
return undefined;
795
}
796
}
797
};
798
};
799
800
const processSideNotes = (endMarker: string) => {
801
return (latexLongTable: string) => {
802
const sideNoteMarker = "\\sidenote{\\footnotesize ";
803
let strProcessing = latexLongTable;
804
const strOutput: string[] = [];
805
const sidenotes: string[] = [];
806
807
let sidenotePos = strProcessing.indexOf(sideNoteMarker);
808
while (sidenotePos > -1) {
809
strOutput.push(strProcessing.substring(0, sidenotePos));
810
811
const remainingStr = strProcessing.substring(
812
sidenotePos + sideNoteMarker.length,
813
);
814
let escaped = false;
815
let sideNoteEnd = -1;
816
for (let i = 0; i < remainingStr.length; i++) {
817
const ch = remainingStr[i];
818
if (ch === "\\") {
819
escaped = true;
820
} else {
821
if (!escaped && ch === "}") {
822
sideNoteEnd = i;
823
break;
824
} else {
825
escaped = false;
826
}
827
}
828
}
829
830
if (sideNoteEnd > -1) {
831
strOutput.push("\\sidenotemark{}");
832
const contents = remainingStr.substring(0, sideNoteEnd);
833
sidenotes.push(contents);
834
strProcessing = remainingStr.substring(sideNoteEnd + 1);
835
sidenotePos = strProcessing.indexOf(sideNoteMarker);
836
} else {
837
strOutput.push(remainingStr);
838
}
839
}
840
841
// Ensure that we inject sidenotes after the longtable
842
const endTable = endMarker;
843
const endPos = strProcessing.indexOf(endTable);
844
const prefix = strProcessing.substring(0, endPos + endTable.length);
845
const suffix = strProcessing.substring(
846
endPos + endTable.length,
847
strProcessing.length,
848
);
849
850
strOutput.push(prefix);
851
for (const note of sidenotes) {
852
strOutput.push(`\\sidenotetext{${note}}\n`);
853
}
854
if (suffix) {
855
strOutput.push(suffix);
856
}
857
858
return strOutput.join("");
859
};
860
};
861
862
const processLongTableSidenotes = processSideNotes("\\end{longtable}");
863
const processTableSidenotes = processSideNotes("\\end{table}");
864
865
const sideNoteProcessor = (
866
beginRegex: RegExp,
867
endRegex: RegExp,
868
callback: (str: string) => string,
869
) => {
870
return () => {
871
let state: "scanning" | "capturing" = "scanning";
872
let capturedLines: string[] = [];
873
return (line: string): string | undefined => {
874
switch (state) {
875
case "scanning":
876
if (line.match(beginRegex)) {
877
state = "capturing";
878
capturedLines = [line];
879
return undefined;
880
} else {
881
return line;
882
}
883
case "capturing":
884
capturedLines.push(line);
885
if (line.match(endRegex)) {
886
state = "scanning";
887
888
// read the whole figure and clear any capture state
889
const lines = capturedLines.join("\n");
890
capturedLines = [];
891
892
// Process the captions and relocate footnotes
893
return callback(lines);
894
} else {
895
return undefined;
896
}
897
}
898
};
899
};
900
};
901
const longTableSidenoteProcessor = sideNoteProcessor(
902
/^\\begin{longtable}.*$/,
903
/^\\end{longtable}%*$/,
904
processLongTableSidenotes,
905
);
906
907
const tableSidenoteProcessor = sideNoteProcessor(
908
/^\\begin{table}.*$/,
909
/^\\end{table}%*$/,
910
processTableSidenotes,
911
);
912
913
const calloutFloatHoldLineProcessor = () => {
914
let state: "scanning" | "replacing" = "scanning";
915
return (line: string): string | undefined => {
916
switch (state) {
917
case "scanning":
918
if (line.match(/^\\begin{tcolorbox}/)) {
919
state = "replacing";
920
return line;
921
} else {
922
return line;
923
}
924
925
case "replacing":
926
if (line.match(/^\\end{tcolorbox}/)) {
927
state = "scanning";
928
return line;
929
} else if (line.match(/^\\begin{figure}$/)) {
930
return "\\begin{figure}[H]";
931
} else if (line.match(/^\\begin{codelisting}$/)) {
932
return "\\begin{codelisting}[H]";
933
} else {
934
return line;
935
}
936
}
937
};
938
};
939
940
const kQuartoBibPlaceholderRegex = "%bib-loc-124C8010";
941
const bibLatexBibligraphyRefsDivProcessor = () => {
942
let hasRefsDiv = false;
943
return (line: string): string | undefined => {
944
if (line === kQuartoBibPlaceholderRegex) {
945
if (!hasRefsDiv) {
946
hasRefsDiv = true;
947
return "\\printbibliography[heading=none]";
948
} else {
949
// already seen a refs div, just ignore this one
950
return undefined;
951
}
952
} else if (hasRefsDiv && line.match(/^\\printbibliography$/)) {
953
return undefined;
954
} else {
955
return line;
956
}
957
};
958
};
959
960
const natbibBibligraphyRefsDivProcessor = (bibs?: string[]) => {
961
let hasRefsDiv = false;
962
return (line: string): string | undefined => {
963
if (line === kQuartoBibPlaceholderRegex) {
964
if (bibs && !hasRefsDiv) {
965
hasRefsDiv = true;
966
return `\\renewcommand{\\bibsection}{}\n\\bibliography{${
967
bibs.join(",")
968
}}`;
969
} else {
970
// already seen a refs div, just ignore this one
971
return undefined;
972
}
973
} else if (hasRefsDiv && line.match(/^\s*\\bibliography{.*}$/)) {
974
return undefined;
975
} else {
976
return line;
977
}
978
};
979
};
980
981
// Removes the biblatex \printbibiliography command
982
const suppressBibLatexBibliographyLineProcessor = () => {
983
return (line: string): string | undefined => {
984
if (line.match(/^\\printbibliography$/)) {
985
return "";
986
}
987
return line;
988
};
989
};
990
991
// Replaces the natbib bibligography declaration with a version
992
// that will not be printed in the PDF
993
const suppressNatbibBibliographyLineProcessor = () => {
994
return (line: string): string | undefined => {
995
return line.replace(/^\s*\\bibliography{(.*)}$/, (_match, bib) => {
996
return `\\newsavebox\\mytempbib
997
\\savebox\\mytempbib{\\parbox{\\textwidth}{\\bibliography{${bib}}}}`;
998
});
999
};
1000
};
1001
1002
// {?quarto-cite:(id)}
1003
const kQuartoCiteRegex = /{\?quarto-cite:(.*?)}/g;
1004
const bibLatexCiteLineProcessor = () => {
1005
return (line: string): string | undefined => {
1006
return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1007
return `\\fullcite{${citeKey}}`;
1008
});
1009
};
1010
};
1011
1012
const natbibCiteLineProcessor = () => {
1013
return (line: string): string | undefined => {
1014
return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1015
return `\\bibentry{${citeKey}}`;
1016
});
1017
};
1018
};
1019
1020
const sideNoteLineProcessor = () => {
1021
return (line: string): string | undefined => {
1022
return line.replaceAll(/\\footnote{/g, "\\sidenote{\\footnotesize ");
1023
};
1024
};
1025
1026
const longtableBottomCaptionProcessor = () => {
1027
let scanning = false;
1028
let capturing = false;
1029
let caption: string | undefined;
1030
1031
return (line: string): string | undefined => {
1032
const isEndOfDocument = !!line.match(/^\\end{document}/);
1033
if (isEndOfDocument && caption) {
1034
return `${caption}\n${line}`;
1035
} else if (scanning) {
1036
// look for a caption line
1037
if (capturing) {
1038
caption = `${caption}\n${line}`;
1039
capturing = !line.match(/\\tabularnewline$/);
1040
return undefined;
1041
} else {
1042
if (
1043
line.match(/^\\caption.*?\\tabularnewline$/) ||
1044
line.match(/^\\caption{.*}\\\\$/)
1045
) {
1046
caption = line;
1047
return undefined;
1048
} else if (line.match(/^\\caption.*?/)) {
1049
caption = line;
1050
capturing = true;
1051
return undefined;
1052
} else if (line.match(/^\\endlastfoot/) && caption) {
1053
line = `\\tabularnewline\n${caption}\n${line}`;
1054
caption = undefined;
1055
return line;
1056
} else if (line.match(/^\\end{longtable}$/)) {
1057
scanning = false;
1058
if (caption) {
1059
line = caption + "\n" + line;
1060
caption = undefined;
1061
return line;
1062
}
1063
}
1064
}
1065
} else {
1066
scanning = !!line.match(/^\\begin{longtable}/);
1067
}
1068
1069
return line;
1070
};
1071
};
1072
1073
const kChapterRefNameRegex = /^\\chapter\*?{(.*?)}\\label{references.*?}$/;
1074
const cleanReferencesChapter = () => {
1075
let refChapterName: string | undefined;
1076
let refChapterContentsRegex: RegExp | undefined;
1077
let refChapterMarkRegex: RegExp | undefined;
1078
1079
return (line: string): string | undefined => {
1080
const chapterRefMatch = line.match(kChapterRefNameRegex);
1081
if (chapterRefMatch) {
1082
refChapterName = chapterRefMatch[1];
1083
refChapterContentsRegex = new RegExp(
1084
`\\\\addcontentsline{toc}{chapter}{${refChapterName}}`,
1085
);
1086
refChapterMarkRegex = new RegExp(
1087
`\\\\markboth{${refChapterName}}{${refChapterName}}`,
1088
);
1089
// Eat this line
1090
return undefined;
1091
} else if (refChapterContentsRegex && line.match(refChapterContentsRegex)) {
1092
// Eat this line
1093
return undefined;
1094
} else if (refChapterMarkRegex && line.match(refChapterMarkRegex)) {
1095
// Eat this line
1096
return undefined;
1097
}
1098
return line;
1099
};
1100
};
1101
1102
const indexAndSuppressPandocBibliography = (
1103
renderedCites: Record<string, string[]>,
1104
) => {
1105
let readingBibliography = false;
1106
let currentCiteKey: string | undefined = undefined;
1107
1108
return (line: string): string | undefined => {
1109
if (
1110
!readingBibliography &&
1111
line.match(/^(\\protect)?\\phantomsection\\label{refs}$/)
1112
) {
1113
readingBibliography = true;
1114
return undefined;
1115
} else if (readingBibliography && line.match(/^\\end{CSLReferences}$/)) {
1116
readingBibliography = false;
1117
return undefined;
1118
} else if (readingBibliography) {
1119
const matches = line.match(/\\bibitem\[\\citeproctext\]{ref\-(.*?)}/);
1120
if (matches && matches[1]) {
1121
currentCiteKey = matches[1];
1122
renderedCites[currentCiteKey] = [line];
1123
} else if (line.length === 0) {
1124
currentCiteKey = undefined;
1125
} else if (currentCiteKey) {
1126
renderedCites[currentCiteKey].push(line);
1127
}
1128
}
1129
1130
if (readingBibliography) {
1131
return undefined;
1132
} else {
1133
return line;
1134
}
1135
};
1136
};
1137
1138
const kInSideCaptionRegex = /^\\sidecaption{/;
1139
const kBeginFigureRegex = /^\\begin{figure}\[.*?\]$/;
1140
const kEndFigureRegex = /^\\end{figure}\%?$/;
1141
1142
const placePandocBibliographyEntries = (
1143
renderedCites: Record<string, string[]>,
1144
) => {
1145
let biblioEntryState: "scanning" | "in-figure" | "in-sidecaption" =
1146
"scanning";
1147
let pendingCiteKeys: string[] = [];
1148
1149
return (line: string): string | undefined => {
1150
switch (biblioEntryState) {
1151
case "scanning": {
1152
if (line.match(kBeginFigureRegex)) {
1153
biblioEntryState = "in-figure";
1154
}
1155
break;
1156
}
1157
case "in-figure": {
1158
if (line.match(kInSideCaptionRegex)) {
1159
biblioEntryState = "in-sidecaption";
1160
} else {
1161
if (line.match(kEndFigureRegex)) {
1162
biblioEntryState = "scanning";
1163
}
1164
}
1165
break;
1166
}
1167
case "in-sidecaption": {
1168
if (line.match(kEndFigureRegex)) {
1169
biblioEntryState = "scanning";
1170
}
1171
break;
1172
}
1173
default:
1174
break;
1175
}
1176
1177
if (biblioEntryState === "scanning" && pendingCiteKeys.length > 0) {
1178
const result = [
1179
line,
1180
"\n\\begin{CSLReferences}{2}{0}",
1181
...pendingCiteKeys,
1182
"\\end{CSLReferences}\n",
1183
].join("\n");
1184
pendingCiteKeys = [];
1185
return result;
1186
}
1187
1188
return line.replaceAll(kQuartoCiteRegex, (_match, citeKey) => {
1189
const citeLines = renderedCites[citeKey];
1190
if (citeLines) {
1191
if (biblioEntryState === "in-sidecaption" && citeLines.length > 0) {
1192
pendingCiteKeys.push(citeLines[0]);
1193
return ["", ...citeLines.slice(1)].join("\n");
1194
} else {
1195
return [
1196
"\n\\begin{CSLReferences}{2}{0}",
1197
...citeLines,
1198
"\\end{CSLReferences}\n",
1199
].join("\n");
1200
}
1201
} else {
1202
return citeKey;
1203
}
1204
});
1205
};
1206
};
1207
1208
const kCodeAnnotationRegex =
1209
/(.*)\\CommentTok\{(.*?)[^\s]+? +\\textless\{\}(\d+)\\textgreater\{\}.*\}$/gm;
1210
const kCodePlainAnnotationRegex = /(.*)% \((\d+)\)$/g;
1211
const codeAnnotationPostProcessor = () => {
1212
let lastAnnotation: string | undefined;
1213
1214
return (line: string): string | undefined => {
1215
if (line === "\\begin{Shaded}") {
1216
lastAnnotation = undefined;
1217
}
1218
1219
// Replace colorized code
1220
line = line.replaceAll(
1221
kCodeAnnotationRegex,
1222
(_match, prefix: string, comment: string, annotationNumber: string) => {
1223
if (annotationNumber !== lastAnnotation) {
1224
lastAnnotation = annotationNumber;
1225
if (comment.length > 0) {
1226
// There is something else inside the comment line so
1227
// We need to recreate the comment line without the annotation
1228
prefix = `${prefix}\\CommentTok\{${comment}\}`;
1229
}
1230
return `${prefix}\\hspace*{\\fill}\\NormalTok{\\circled{${annotationNumber}}}`;
1231
} else {
1232
return `${prefix}`;
1233
}
1234
},
1235
);
1236
1237
// Replace plain code
1238
line = line.replaceAll(
1239
kCodePlainAnnotationRegex,
1240
(_match, prefix: string, annotationNumber: string) => {
1241
if (annotationNumber !== lastAnnotation) {
1242
lastAnnotation = annotationNumber;
1243
1244
const replaceValue = `(${annotationNumber})`;
1245
const paddingNumber = Math.max(
1246
0,
1247
75 - prefix.length - replaceValue.length,
1248
);
1249
const padding = " ".repeat(paddingNumber);
1250
return `${prefix}${padding}${replaceValue}`;
1251
} else {
1252
return `${prefix}`;
1253
}
1254
},
1255
);
1256
1257
return line;
1258
};
1259
};
1260
1261
const kListAnnotationRegex = /(.*)5CB6E08D-list-annote-(\d+)(.*)/g;
1262
const codeListAnnotationPostProcessor = () => {
1263
return (line: string): string | undefined => {
1264
return line.replaceAll(
1265
kListAnnotationRegex,
1266
(_match, prefix: string, annotationNumber: string, suffix: string) => {
1267
return `${prefix}\\circled{${annotationNumber}}${suffix}`;
1268
},
1269
);
1270
};
1271
};
1272
1273
const kbeginLongTablesideCap = `{
1274
\\makeatletter
1275
\\def\\LT@makecaption#1#2#3{%
1276
\\noalign{\\smash{\\hbox{\\kern\\textwidth\\rlap{\\kern\\marginparsep
1277
\\parbox[t]{\\marginparwidth}{%
1278
\\footnotesize{%
1279
\\vspace{(1.1\\baselineskip)}
1280
#1{#2: }\\ignorespaces #3}}}}}}%
1281
}
1282
\\makeatother`;
1283
1284
const kEndLongTableSideCap = "}";
1285
1286
// LaTeX-supported PDF standards (from latex3/latex2e DocumentMetadata)
1287
// See: https://github.com/latex3/latex2e - documentmetadata-support.dtx
1288
const kLatexSupportedStandards = new Set([
1289
// PDF/A standards (note: a-1a is NOT supported, only a-1b)
1290
"a-1b",
1291
"a-2a",
1292
"a-2b",
1293
"a-2u",
1294
"a-3a",
1295
"a-3b",
1296
"a-3u",
1297
"a-4",
1298
"a-4e",
1299
"a-4f",
1300
// PDF/X standards
1301
"x-4",
1302
"x-4p",
1303
"x-5g",
1304
"x-5n",
1305
"x-5pg",
1306
"x-6",
1307
"x-6n",
1308
"x-6p",
1309
// PDF/UA standards (only ua-2 is supported by LaTeX)
1310
"ua-2",
1311
]);
1312
1313
// Standards that require PDF tagging (document structure)
1314
// - PDF/A level "a" variants require tagged structure per PDF/A spec
1315
// - PDF/UA standards require tagging for universal accessibility
1316
// (LaTeX does NOT automatically enable tagging for UA standards)
1317
const kTaggingRequiredStandards = new Set([
1318
"a-2a",
1319
"a-3a",
1320
"ua-1",
1321
"ua-2",
1322
]);
1323
1324
const kVersionPattern = /^(1\.[4-7]|2\.0)$/;
1325
1326
// PDF version required by each standard (maximum version limits)
1327
// LaTeX defaults to PDF 2.0 with \DocumentMetadata, but some standards
1328
// have maximum version requirements that are incompatible with 2.0
1329
// Note: a-1a is intentionally omitted as LaTeX doesn't support it
1330
const kStandardRequiredVersion: Record<string, string> = {
1331
// PDF/A-1 requires exactly PDF 1.4 (only a-1b supported by LaTeX)
1332
"a-1b": "1.4",
1333
// PDF/A-2 and PDF/A-3 have maximum version of 1.7
1334
"a-2a": "1.7",
1335
"a-2b": "1.7",
1336
"a-2u": "1.7",
1337
"a-3a": "1.7",
1338
"a-3b": "1.7",
1339
"a-3u": "1.7",
1340
// PDF/A-4, PDF/UA-1, PDF/UA-2 all work with PDF 2.0 (the default)
1341
};
1342
1343
function normalizePdfStandardForLatex(
1344
standards: unknown[],
1345
): { version?: string; standards: string[]; needsTagging: boolean } {
1346
let version: string | undefined;
1347
const result: string[] = [];
1348
let needsTagging = false;
1349
1350
for (const s of standards) {
1351
// Convert to string - YAML may parse versions like 2.0 as integer 2
1352
let str: string;
1353
if (typeof s === "number") {
1354
// Handle YAML numeric parsing: integer 2 -> "2.0", float 1.4 -> "1.4"
1355
str = Number.isInteger(s) ? `${s}.0` : String(s);
1356
} else if (typeof s === "string") {
1357
str = s;
1358
} else {
1359
continue;
1360
}
1361
// Normalize: lowercase, remove any "pdf" prefix
1362
const normalized = str.toLowerCase().replace(/^pdf[/-]?/, "");
1363
1364
if (kVersionPattern.test(normalized)) {
1365
// Use first explicit version (ignore subsequent ones)
1366
if (!version) {
1367
version = normalized;
1368
}
1369
} else if (kLatexSupportedStandards.has(normalized)) {
1370
// LaTeX is case-insensitive, pass through lowercase
1371
result.push(normalized);
1372
// Check if this standard requires tagging
1373
if (kTaggingRequiredStandards.has(normalized)) {
1374
needsTagging = true;
1375
}
1376
// Infer required PDF version from standard (if not explicitly set)
1377
if (!version && kStandardRequiredVersion[normalized]) {
1378
version = kStandardRequiredVersion[normalized];
1379
}
1380
} else {
1381
warning(
1382
`PDF standard '${s}' is not supported by LaTeX and will be ignored`,
1383
);
1384
}
1385
}
1386
1387
return { version, standards: result, needsTagging };
1388
}
1389
1390