diff --git a/src/Lexer.js b/src/Lexer.js index d9be1fef11..87f46b4b06 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -348,8 +348,8 @@ export class Lexer { } // Mask out escaped characters - while ((match = this.tokenizer.rules.inline.escapedPunct.exec(maskedSrc)) != null) { - maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedPunct.lastIndex); + while ((match = this.tokenizer.rules.inline.anyPunctuation.exec(maskedSrc)) != null) { + maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.anyPunctuation.lastIndex); } while (src) { diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 30611218e6..b4fc3722ea 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -613,7 +613,7 @@ export class Tokenizer { const nextChar = match[1] || match[2] || ''; - if (!nextChar || (nextChar && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) { + if (!nextChar || !prevChar || this.rules.inline.punctuation.exec(prevChar)) { const lLength = match[0].length - 1; let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0; diff --git a/src/rules.js b/src/rules.js index a3064089cc..76d1d40fd0 100644 --- a/src/rules.js +++ b/src/rules.js @@ -172,48 +172,49 @@ export const inline = { nolink: /^!?\[(ref)\](?:\[\])?/, reflinkSearch: 'reflink|nolink(?!\\()', emStrong: { - lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/, - // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. - // () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a - rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[^*]+(?=[^*])|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/, - rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _ + lDelim: /^(?:\*+(?:((?!\*)[punct])|[^\s*]))|^_+(?:((?!_)[punct])|([^\s_]))/, + // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. + // | Skip orphan inside strong | Consume to delim | (1) #*** | (2) a***#, a*** | (3) #***a, ***a | (4) ***# | (5) #***# | (6) a***a + rDelimAst: /^[^_*]*?__[^_*]*?\*[^_*]*?(?=__)|[^*]+(?=[^*])|(?!\*)[punct](\*+)(?=[\s]|$)|[^punct\s](\*+)(?!\*)(?=[punct\s]|$)|(?!\*)[punct\s](\*+)(?=[^punct\s])|[\s](\*+)(?!\*)(?=[punct])|(?!\*)[punct](\*+)(?!\*)(?=[punct])|[^punct\s](\*+)(?=[^punct\s])/, + rDelimUnd: /^[^_*]*?\*\*[^_*]*?_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|(?!_)[punct](_+)(?=[\s]|$)|[^punct\s](_+)(?!_)(?=[punct\s]|$)|(?!_)[punct\s](_+)(?=[^punct\s])|[\s](_+)(?!_)(?=[punct])|(?!_)[punct](_+)(?!_)(?=[punct])/ // ^- Not allowed for _ }, code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, br: /^( {2,}|\\)\n(?!\s*$)/, del: noopTest, text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\?@\\[\\]`^{|}~\\\\' + inline._uc_punctuation; -inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex(); +// list of unicode punctuation marks, plus any missing characters from CommonMark spec +inline._punctuation = '\\p{P}$+<=>`^|~'; +inline.punctuation = edit(inline.punctuation, 'u').replace(/punctuation/g, inline._punctuation).getRegex(); // sequences em should skip over [title](link), `code`, inline.blockSkip = /\[[^[\]]*?\]\([^\(\)]*?\)|`[^`]*?`|<[^<>]*?>/g; -inline.escapedPunct = /\\[punct_*]/g; +inline.anyPunctuation = /\\[punct]/g; +inline._escapes = /\\([punct])/g; inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex(); -inline.emStrong.lDelim = edit(inline.emStrong.lDelim) +inline.emStrong.lDelim = edit(inline.emStrong.lDelim, 'u') .replace(/punct/g, inline._punctuation) .getRegex(); -inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'g') +inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); -inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'g') +inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); -inline.escapedPunct = edit(inline.escapedPunct, 'g') +inline.anyPunctuation = edit(inline.anyPunctuation, 'gu') .replace(/punct/g, inline._punctuation) .getRegex(); -inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g; +inline._escapes = edit(inline._escapes, 'gu') + .replace(/punct/g, inline._punctuation) + .getRegex(); inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;