Path: blob/main/src/resources/pandoc/datadir/lpegfenceddiv.lua
12926 views
-- LPEG "parsing" and code for fenced div workarounds1-- Copyright (C) 2024 Posit Software, PBC23local lpeg = require('lpeg')4local colons = lpeg.P(':')^35local maybe_spaces = lpeg.S("\t ")^06local newline = lpeg.P("\n")78local single_quoted_string = lpeg.C(lpeg.P("'") * (lpeg.P("\\'") + (lpeg.P(1) - lpeg.P("'")))^0 * lpeg.P("'"))9local double_quoted_string = lpeg.C(lpeg.P('"') * (lpeg.P('\\"') + (lpeg.P(1) - lpeg.P('"')))^0 * lpeg.P('"'))10local literal = lpeg.C(11(lpeg.R("az", "AZ") + lpeg.S("_#.=")) *12(lpeg.R("az", "AZ", "09") + lpeg.S(".=-_"))^013)14local Cp = lpeg.Cp()1516local function anywhere(p)17return lpeg.P{ p + 1 * lpeg.V(1) }18end19local function anywhere_pos(p)20return lpeg.P{ Cp * p * Cp + 1 * lpeg.V(1) }21end2223local div_attr_block = lpeg.P("{") * maybe_spaces * ((single_quoted_string + double_quoted_string + literal) * maybe_spaces)^0 * lpeg.P("}")2425local start_div = colons * maybe_spaces * div_attr_block * (newline + lpeg.P(-1))26local start_div_search = anywhere_pos(start_div)2728local function first_and_last(...)29local arg = {...}30local n = #arg31return arg[1], arg[n]32end3334local single_quote_p = anywhere(lpeg.P("'"))35local double_quote_p = anywhere(lpeg.P('"'))36local bad_equals = anywhere_pos(lpeg.P("= ") + (lpeg.P(" =") * lpeg.P(" ")^-1))3738local function attempt_to_fix_fenced_div(txt)39local b, e = first_and_last(start_div_search:match(txt))40while b do41local substring = txt:sub(b, e - 1)42local function count(txt, p, b)43local result = 044if not b then45b = 146end47while b do48b = p:match(txt, b)49if b then50result = result + 151end52end53return result54end55-- now we try to find the dangerous `=` with spaces around it56-- the best heuristic we have at the moment is to look for a ` = `, `= ` or ` =`57-- and then attempt to rule out that the `=` is part of a quoted string58-- if `=` is not part of a quoted string, then we'll have an even number of single and double quotes59-- to the left and right of the `=`60-- if there's a total odd number of quotes, then this is a badly formatted key-value pair61-- for a _different_ reason, so we do nothing6263local bad_eq, bad_eq_end = bad_equals:match(substring)64if bad_eq then65local total_single = count(substring, single_quote_p)66local total_double = count(substring, double_quote_p)67local right_single = count(substring, single_quote_p, bad_eq_end)68local right_double = count(substring, double_quote_p, bad_eq_end)69local left_single = total_single - right_single70local left_double = total_double - right_double71if left_single % 2 == 0 and right_single % 2 == 0 and left_double % 2 == 0 and right_double % 2 == 0 then72-- we have a bad key-value pair73-- we need to replace the `=` with _no spaces_74local replacement = substring:sub(1, bad_eq - 1) .. "=" .. substring:sub(bad_eq_end)75local pad_length = #replacement - #substring7677-- in order to keep the string length the same, we need add spaces to the end of the block78txt = txt:sub(1, b - 1) .. replacement .. txt:sub(e) .. (" "):rep(pad_length)7980-- if substitution was made, we need to search at the beginning again81-- to find the next bad key-value pair in the same block82b, e = first_and_last(start_div_search:match(txt, b))83else84b, e = first_and_last(start_div_search:match(txt, e))85end86else87b, e = first_and_last(start_div_search:match(txt, e))88end89end90return txt91end9293---------------------------------------------------9495local div_attr_block_tests = {96"{#id .class key='value'}",97"{#id .class key=value}",98'{#id .class key="value with spaces"}',99}100101local div_block_tests = {102"::: {#id .class key='value'}",103"::: {#id .class key=value}",104'::: {#id .class key="value with spaces"}',105}106local end_to_end_tests = {107"::: {#id-1 .class key =value}\nfoo\n:::\n\n::: {#id-2 .class key='value'}\nfoo\n:::\n",108"::: {#id-1 .class key = value}\nfoo\n:::\n\n::: {#id-2 .class key='value'}\nfoo\n:::\n",109"::: {#id-1 .class key= value}\nfoo\n:::\n\n::: {#id-2 .class key='value'}\nfoo\n:::\n",110"::: {#id-1 .class key =value}\nfoo\n:::\n\n::: {#id-2 .class key= 'value'}\nfoo\n:::\n",111"::: {#id-1 .class key = value}\nfoo\n:::\n\n::: {#id-2 .class key = 'value'}\nfoo\n:::\n",112"::: {#id-1 .class key= value}\nfoo\n:::\n\n::: {#id-2 .class key ='value'}\nfoo\n:::\n",113"::: {#id-1 .class key= value please='do not touch = this one'}\nfoo\n:::",114"::: {#id-1 .class key= value key2 =value2}\nfoo\n:::",115"::: {#id-4 key = value}\nfoo\n:::",116}117118local function tests()119for _, test in ipairs(div_attr_block_tests) do120print(div_attr_block:match(test))121end122for _, test in ipairs(div_block_tests) do123print(start_div_search:match(test))124end125for _, test in ipairs(end_to_end_tests) do126print(attempt_to_fix_fenced_div(test))127print("---")128end129end130131return {132_tests = tests,133attempt_to_fix_fenced_div = attempt_to_fix_fenced_div134}135136