From d53003d59c28784b9c432ec9863d7f64b7521d29 Mon Sep 17 00:00:00 2001 From: Erik Demaine Date: Wed, 15 Nov 2023 12:43:20 -0500 Subject: [PATCH] Improve tabular parsing, supporting URLs with & --- CHANGELOG.md | 2 ++ lib/formats.coffee | 26 +++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08db000..56d111d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ instead of version numbers. By contrast, optional arguments add parentheses: `\begin{theorem 1}` produces "Theorem (1)", and `\begin{proof}[sketch]` produces "Proof (sketch)". +* `\begin{tabular}` improvements: skip over `\&`, HTML tags/character codes, + and braced content, which makes it possible to include URLs with `&`s. ## 2023-11-01 diff --git a/lib/formats.coffee b/lib/formats.coffee index 6675cfc..a8792e1 100644 --- a/lib/formats.coffee +++ b/lib/formats.coffee @@ -269,6 +269,26 @@ texAlign = raggedright: 'left' centering: 'center' +## Split string at all matches of given regular expression `re`, +## while ignoring `text` context that is nested within unescaped braces +## or HTML and { character codes. +splitOutside = (text, re) -> + re = ///[{}]|<[^<>]*>|&\#x?\d+;|(#{re.source})|\\.///g + braces = 0 + tags = 0 + start = 0 + parts = [] + while (match = re.exec text)? + if match[0] == '{' + braces++ + else if match[0] == '}' + braces-- + else if match[1]? and braces == 0 + parts.push text[start...match.index] + start = match.index + match[0].length + parts.push text[start..] + parts + ## Process all commands starting with \ followed by a letter a-z. ## This is not a valid escape sequence in Markdown, so can be safely supported ## in Markdown too. @@ -277,15 +297,15 @@ latex2htmlCommandsAlpha = (tex, math) -> ## Process tabular environments first in order to split cells at & ## (so e.g. \bf is local to the cell) .replace /\\begin\s*{tabular}\s*{([^{}]*)}([^]*?)\\end\s*{tabular}/g, (m, cols, body) -> - cols = cols.replace /|/g, '' # not yet supported + cols = cols.replace /\|/g, '' # not yet supported body = body.replace /\\hline\s*|\\cline\s*{[^{}]*}/g, '' # not yet supported skip = (0 for colnum in [0...cols.length]) '' + - (for row in body.split /(?:\\\\|\[DOUBLEBACKSLASH\])/ #(?:\s*\\(?:hline|cline\s*{[^{}]*}))?/ + (for row in splitOutside body, /(?:\\\\|\[DOUBLEBACKSLASH\])/ #(?:\s*\\(?:hline|cline\s*{[^{}]*}))?/ #console.log row continue unless row.trim() "\n" + - (for col, colnum in row.split '&' + (for col, colnum in splitOutside row, /&/ if skip[colnum] skip[colnum] -= 1 continue