From c242d9b16657dfc8313c36388d74561c920e55de Mon Sep 17 00:00:00 2001 From: Rowan Cockett Date: Thu, 16 Nov 2023 13:45:32 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=A5=BC=20Improve=20suffix=20citation=20pa?= =?UTF-8?q?rsing=20(#757)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .changeset/spotty-peaches-invent.md | 5 ++++ packages/markdown-it-myst/src/citations.ts | 29 +++++++------------ packages/markdown-it-myst/tests/citations.yml | 25 ++++++++++++++++ 3 files changed, 40 insertions(+), 19 deletions(-) create mode 100644 .changeset/spotty-peaches-invent.md diff --git a/.changeset/spotty-peaches-invent.md b/.changeset/spotty-peaches-invent.md new file mode 100644 index 000000000..965b05c71 --- /dev/null +++ b/.changeset/spotty-peaches-invent.md @@ -0,0 +1,5 @@ +--- +'markdown-it-myst': patch +--- + +Improve the suffix label parsing for citations diff --git a/packages/markdown-it-myst/src/citations.ts b/packages/markdown-it-myst/src/citations.ts index a12b0d90f..92dbdd7a2 100644 --- a/packages/markdown-it-myst/src/citations.ts +++ b/packages/markdown-it-myst/src/citations.ts @@ -25,7 +25,8 @@ export interface Citation { export const citationsPlugin: PluginWithOptions = (md) => { const regexes = { citation: /^([^^-]|[^^].+?)?(-)?@([\w][\w:.#$%&\-+?<>~/]*)(.+)?$/, - inText: /^@((?:[\w|{][\w:.#$%&\-+?<>~/]*[\w|}])|\w)(\s*)(\[)?/, + // Only allow a short [suffix] for in text citations (e.g. 50 characters) + inText: /^@((?:[\w|{][\w:.#$%&\-+?<>~/]*[\w|}])|\w)(\s*)(\[([^\]]{1,50})\])?/, allowedBefore: /^[^a-zA-Z.0-9]$/, }; @@ -50,24 +51,14 @@ export const citationsPlugin: PluginWithOptions = (md) => { (token as any).col = [state.pos]; } if (match[3]) { - // suffix is there - const suffixStart = state.pos + match[0].length; - const suffixEnd = state.md.helpers.parseLinkLabel(state, suffixStart); - const charAfter = state.src.codePointAt(suffixEnd + 1); - if (suffixEnd > 0 && charAfter != 0x28 && charAfter != 0x5b /* ( or [ */) { - const suffix = state.src.slice(suffixStart, suffixEnd); - citation.suffix = state.md.parseInline(suffix, state.env); - state.pos += match[0].length + suffixEnd - suffixStart + 1; - if (token) { - token.content = match[0] + suffix + ']'; - (token as any).col.push(state.pos); - } - } else { - state.pos += match[0].length - match[2].length - match[3].length; - if (token) { - token.content = match[0]; - (token as any).col.push(state.pos); - } + // The in-text citation is followed by [suffix] + // Another way to do this is to use `state.md.helpers.parseLinkLabel(state, suffixStart);` + const suffix = match[4]; + citation.suffix = state.md.parseInline(suffix, state.env); + state.pos += match[0].length; + if (token) { + token.content = match[0]; + (token as any).col.push(state.pos); } } else { state.pos += match[0].length - match[2].length; diff --git a/packages/markdown-it-myst/tests/citations.yml b/packages/markdown-it-myst/tests/citations.yml index ed6f44754..3b486ace6 100644 --- a/packages/markdown-it-myst/tests/citations.yml +++ b/packages/markdown-it-myst/tests/citations.yml @@ -202,3 +202,28 @@ cases: - type: text content: ', the authors...' - type: paragraph_close + - title: Nested labels + md: 'These include experimentally produced alkaline magmas from @iacovino2016 [`alkaline.xlsx`], basaltic melt inclusions from Kilauea [@tucker2019] and Gakkel Ridge [@bennett2019 `basalts.xlsx`], basaltic melt inclusions from Cerro Negro volcano, Nicaragua [@roggensack2001 `cerro_negro.xlsx`], and rhyolite melt inclusions from the Taupo Volcanic Center, New Zealand [@myers2019] and a topaz rhyolite from the Rio Grande Rift @mercer2015 [`rhyolites.xlsx`].' + tokens: + - type: paragraph_open + - type: inline + children: + - type: text + content: 'These include experimentally produced alkaline magmas from ' + - type: cite + content: '@iacovino2016 [`alkaline.xlsx`]' + meta: + label: iacovino2016 + kind: narrative + suffix: + - content: '`alkaline.xlsx`' + - type: text + content: ', basaltic melt inclusions from Kilauea ' + - type: cite + content: '@tucker2019' + meta: + label: tucker2019 + kind: parenthetical + - type: text + content: ' and Gakkel Ridge ' + - type: paragraph_close