Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
quarto-dev
GitHub Repository: quarto-dev/quarto-cli
Path: blob/main/src/resources/pandoc/datadir/readqmd.lua
12926 views
1
-- read qmd with quarto syntax extensions and produce quarto's extended AST
2
-- Copyright (C) 2023-2024 Posit Software, PBC
3
--
4
-- Originally by Albert Krewinkel
5
6
local md_shortcode = require("lpegshortcode")
7
local md_fenced_div = require("lpegfenceddiv")
8
9
-- Support the same format extensions as pandoc's Markdown reader
10
Extensions = pandoc.format.extensions 'markdown'
11
12
-- we replace invalid tags with random strings of the same size
13
-- to safely allow code blocks inside pipe tables
14
-- note that we can't use uppercase letters here
15
-- because pandoc canonicalizes classes to lowercase.
16
local function random_string(size)
17
local chars = "abcdefghijklmnopqrstuvwxyz"
18
local lst = {}
19
for _ = 1,size do
20
local ix = math.random(1, #chars)
21
table.insert(lst, string.sub(chars, ix, ix))
22
end
23
return table.concat(lst, "")
24
end
25
26
local function find_invalid_tags(str)
27
-- [^.=\n]
28
-- we disallow "." to avoid catching {.python}
29
-- we disallow "=" to avoid catching {foo="bar"}
30
-- we disallow "\n" to avoid multiple lines
31
32
-- no | in lua patterns...
33
34
-- (c standard, 7.4.1.10, isspace function)
35
-- %s catches \n and \r, so we must use [ \t\f\v] instead
36
37
local patterns = {
38
"^[ \t\f\v]*(```+[ \t\f\v]*)(%{+[^.=\n\r]*%}+)",
39
"\n[ \t\f\v]*(```+[ \t\f\v]*)(%{+[^.=\n\r]+%}+)"
40
}
41
local function find_it(init)
42
for _, pattern in ipairs(patterns) do
43
local range_start, range_end, ticks, tag = str:find(pattern, init)
44
if range_start ~= nil then
45
return range_start, range_end, ticks, tag
46
end
47
end
48
return nil
49
end
50
51
local init = 1
52
local range_start, range_end, ticks, tag = find_it(init)
53
local tag_set = {}
54
local tags = {}
55
while tag ~= nil do
56
init = range_end + 1
57
if not tag_set[tag] then
58
tag_set[tag] = true
59
table.insert(tags, tag)
60
end
61
range_start, range_end, ticks, tag = find_it(init)
62
end
63
return tags
64
end
65
66
local function escape_invalid_tags(str)
67
local tags = find_invalid_tags(str)
68
-- we must now replace the tags in a careful order. Specifically,
69
-- we can't replace a key that's a substring of a larger key without
70
-- first replacing the larger key.
71
--
72
-- ie. if we replace {python} before {{python}}, Bad Things Happen.
73
-- so we sort the tags by descending size, which suffices
74
table.sort(tags, function(a, b) return #b < #a end)
75
76
local replacements = {}
77
for _, k in ipairs(tags) do
78
local replacement
79
local attempts = 1
80
repeat
81
replacement = random_string(#k)
82
attempts = attempts + 1
83
until str:find(replacement, 1, true) == nil or attempts == 100
84
if attempts == 100 then
85
-- luacov: disable
86
print("Internal error, could not find safe replacement for "..k.." after 100 tries")
87
print("Please file a bug at https://github.com/quarto-dev/quarto-cli")
88
os.exit(1)
89
-- luacov: enable
90
end
91
-- replace all lua special pattern characters with their
92
-- escaped versions
93
local safe_pattern = k:gsub("([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1")
94
replacements[replacement] = k
95
local patterns = {
96
"^([ \t\f\v]*```+[ \t\f\v]*)" .. safe_pattern,
97
"(\n[ \t\f\v]*```+[ \t\f\v]*)" .. safe_pattern
98
}
99
100
str = str:gsub(patterns[1], "%1" .. replacement):gsub(patterns[2], "%1" .. replacement)
101
end
102
return str, replacements
103
end
104
105
local function unescape_invalid_tags(str, tags)
106
for replacement, k in pairs(tags) do
107
-- replace all lua special replacement characters with their
108
-- escaped versions, so that when we restore the behavior,
109
-- we don't accidentally create a pattern
110
local result = k:gsub("([$%%])", "%%%1")
111
str = str:gsub(replacement, result)
112
end
113
return str
114
end
115
116
-- Convert a hexadecimal string back to the original string
117
local function hex_to_string(hex)
118
return (hex:gsub('..', function(cc)
119
return string.char(tonumber(cc, 16))
120
end))
121
end
122
123
local function readqmd(txt, opts)
124
local uuid_pattern = "b58fc729%-690b%-4000%-b19f%-365a4093b2ff;([A-Fa-f0-9]+);"
125
local tags
126
txt = md_fenced_div.attempt_to_fix_fenced_div(txt)
127
txt, tags = escape_invalid_tags(txt)
128
txt = md_shortcode.parse_md_shortcode_2(txt)
129
local flavor = {
130
format = "markdown",
131
extensions = {},
132
}
133
if param("user-defined-from") then
134
flavor = _quarto.format.parse_format(param("user-defined-from"))
135
else
136
for k, v in pairs(opts.extensions) do
137
flavor.extensions[v] = true
138
end
139
end
140
141
-- ### Opt-out some extensions that we know we won't support for now ###
142
-- https://pandoc.org/MANUAL.html#extension-table_attributes
143
-- https://github.com/quarto-dev/quarto-cli/pull/13249#issuecomment-3715267414
144
-- Only disable if the extension is actually supported by the format
145
local all_exts = pandoc.format.all_extensions(flavor.format)
146
if all_exts:includes('table_attributes') then
147
flavor.extensions["table_attributes"] = false
148
end
149
150
-- Format flavor, i.e., which extensions should be enabled/disabled.
151
local function restore_invalid_tags(tag)
152
return tags[tag] or tag
153
end
154
155
-- parse_shortcode overparses shortcodes inside code blocks, link targets, etc.
156
-- so we need to undo that damage here
157
158
local unshortcode_text = function (c)
159
c.text = c.text:gsub(uuid_pattern, hex_to_string)
160
return c
161
end
162
163
local function filter_attrs(el)
164
for k,v in pairs(el.attributes) do
165
if type(v) == "string" then
166
local new_str = v:gsub(uuid_pattern, hex_to_string)
167
-- we avoid always assigning to slightly workaround
168
-- what appears to be a foundational problem with Pandoc's Lua API
169
-- while accessing attributes with repeated keys.
170
-- Quarto is still going to be broken for the case
171
-- where there are shortcodes inside values of attributes with
172
-- repeated keys:
173
--
174
-- []{k='{{< meta k1 >}}' k='{{< meta k2 >}}'}
175
--
176
-- But I don't know how to work around this.
177
if new_str ~= v then
178
el.attributes[k] = new_str
179
end
180
end
181
end
182
return el
183
end
184
185
local doc = pandoc.read(txt or "", flavor, opts):walk {
186
CodeBlock = function (cb)
187
cb.classes = cb.classes:map(restore_invalid_tags)
188
cb.text = cb.text:gsub(uuid_pattern, hex_to_string)
189
cb.text = unescape_invalid_tags(cb.text, tags)
190
return cb
191
end,
192
Code = unshortcode_text,
193
RawInline = unshortcode_text,
194
RawBlock = unshortcode_text,
195
Math = unshortcode_text,
196
Header = filter_attrs,
197
Span = filter_attrs,
198
Div = filter_attrs,
199
Link = function (l)
200
l = filter_attrs(l)
201
l.target = l.target:gsub(uuid_pattern, hex_to_string)
202
return l
203
end,
204
Image = function (i)
205
i = filter_attrs(i)
206
-- Replace UUID-encoded shortcodes in i.src
207
i.src = i.src:gsub(uuid_pattern, hex_to_string)
208
return i
209
end,
210
Str = function(str_node)
211
local str = str_node.text
212
-- Quick check: if UUID not present at all, return as-is
213
if not str:find("b58fc729-690b-4000-b19f-365a4093b2ff", 1, true) then
214
return nil
215
end
216
217
local result = pandoc.Inlines{}
218
local pos = 1
219
220
while true do
221
local match_start, match_end, hex_content = str:find(uuid_pattern, pos)
222
223
if not match_start then
224
-- No more matches; append remaining string if any
225
if pos <= #str then
226
table.insert(result, pandoc.Str(str:sub(pos)))
227
end
228
break
229
end
230
231
-- Append prefix before the match as a Str node (if non-empty)
232
if match_start > pos then
233
table.insert(result, pandoc.Str(str:sub(pos, match_start - 1)))
234
end
235
236
-- Convert hex to original shortcode string
237
local shortcode_text = hex_to_string(hex_content)
238
239
-- Parse the shortcode to markdown span syntax
240
local parsed_md = md_shortcode.parse_md_shortcode(shortcode_text) or ""
241
242
-- Convert to Pandoc inlines via pandoc.read
243
local doc = pandoc.read(parsed_md, "markdown")
244
local inlines = doc.blocks[1] and doc.blocks[1].content or pandoc.Inlines{}
245
-- Append the inlines to result
246
for _, inline in ipairs(inlines) do
247
table.insert(result, inline)
248
end
249
250
-- Move position past the match
251
pos = match_end + 1
252
end
253
254
return result
255
end
256
}
257
return doc
258
end
259
260
local reader_option_keys = {
261
"abbreviations",
262
"columns",
263
"default_image_extension",
264
"extensions",
265
"indented_code_classes",
266
"standalone",
267
"strip_comments",
268
"tab_stops",
269
"track_changes",
270
}
271
272
local function options_to_meta(opts)
273
local result = {}
274
for _, key in ipairs(reader_option_keys) do
275
result[key] = opts[key]
276
end
277
return result
278
end
279
280
local function meta_to_options(meta)
281
local result = {}
282
for _, key in ipairs(reader_option_keys) do
283
result[key] = meta[key]
284
end
285
return pandoc.ReaderOptions(result)
286
end
287
288
return {
289
readqmd = readqmd,
290
options_to_meta = options_to_meta,
291
meta_to_options = meta_to_options
292
}
293
294