From 63fd8158a109a8858e551657ba525d8ce2001d45 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 26 Nov 2015 17:38:41 +0100 Subject: [PATCH] [WIP] Refactor to remove regular expressions ...in favour of an algorithmic approach. * [x] `yamlFrontMatter` * [x] `newline` * [x] `code` * [x] `horizontalRule` * [x] `escape` * [x] `deletion` * [x] `break` * [x] `inlineText` * [x] `fences` * [x] `heading` * [x] `lineHeading` * [ ] `blockquote` * [ ] `list` * [x] `html` * [x] `definition` * [x] `footnoteDefinition` * [ ] `looseTable` * [ ] `table` * [ ] `paragraph` * [x] `autoLink` * [x] `url` * [x] `tag` * [ ] `link` * [x] `reference` * [x] `shortcutReference` * [ ] `strong` * [ ] `emphasis` * [x] `inlineCode` Regressions (either todo or in the subsequent major release): * The strictness required by CommonMark for HTML comments is loosened; * Malformed HTML block elements are not supported. [ci skip] --- .jscs.json | 2 +- example.js | 20 - lib/expressions.js | 39 +- lib/parse.js | 3340 +++++++++++++++--- lib/parse/block-elements.json | 52 + script/build-expressions.js | 253 +- test/tree/html-attributes.json | 60 +- test/tree/links-reference-style.json | 28 +- test/tree/markdown-documentation-basics.json | 5 +- 9 files changed, 2917 insertions(+), 882 deletions(-) delete mode 100644 example.js create mode 100644 lib/parse/block-elements.json diff --git a/.jscs.json b/.jscs.json index 510b269a1..c7624dcfe 100644 --- a/.jscs.json +++ b/.jscs.json @@ -138,7 +138,7 @@ "else" ], "requireLineFeedAtFileEnd": true, - "maximumLineLength": 78, + "maximumLineLength": 79, "requireCapitalizedConstructors": true, "safeContextKeyword": "self", "requireDotNotation": true, diff --git a/example.js b/example.js deleted file mode 100644 index 71faacef5..000000000 --- a/example.js +++ /dev/null @@ -1,20 +0,0 @@ -// Load dependencies: -var mdast = require('./index.js'); -var html = require('mdast-html'); -var yamlConfig = require('mdast-yaml-config'); - -// Use plugins: -var processor = mdast().use(yamlConfig).use(html); - -// Process the document: -var doc = processor.process([ - '---', - 'mdast:', - ' commonmark: true', - '---', - '', - '2) Some *emphasis*, **strongness**, and `code`.' -].join('\n')); - -// Yields: -console.log('html', doc); diff --git a/lib/expressions.js b/lib/expressions.js index 4e219d06f..9724f8cde 100644 --- a/lib/expressions.js +++ b/lib/expressions.js @@ -2,49 +2,24 @@ /* eslint-env commonjs */ module.exports = { 'rules': { - 'newline': /^\n((?:[ \t]*\n)*)/, - 'code': /^((?:(?: {4}|\t)[^\n]*\n?((?:[ \t]*\n)*))+)/, - 'horizontalRule': /^[ \t]*([-*_])( *\1){2,} *(?=\n|$)/, - 'heading': /^([ \t]*)(#{1,6})(?:([ \t]+)([^\n]+?))??(?:[ \t]+#+)?[ \t]*(?=\n|$)/, - 'lineHeading': /^(\ {0,3})([^\n]+?)[ \t]*\n\ {0,3}(=|-){1,}[ \t]*(?=\n|$)/, - 'definition': /^[ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$)/, 'bullet': /(?:[*+-]|\d+\.)/, 'indent': /^([ \t]*)((?:[*+-]|\d+\.))( {1,4}(?! )| |\t)/, 'item': /([ \t]*)((?:[*+-]|\d+\.))( {1,4}(?! )| |\t)[^\n]*(?:\n(?!\1(?:[*+-]|\d+\.)[ \t])[^\n]*)*/gm, 'list': /^([ \t]*)((?:[*+-]|\d+\.))[ \t][\s\S]+?(?:(?=\n+\1?(?:[-*_][ \t]*){3,}(?:\n|$))|(?=\n+[ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$))|\n{2,}(?![ \t])(?!\1(?:[*+-]|\d+\.)[ \t])|$)/, 'blockquote': /^(?=[ \t]*>)(?:(?:(?:[ \t]*>[^\n]*\n)*(?:[ \t]*>[^\n]+(?=\n|$))|(?![ \t]*>)(?![ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$))[^\n]+)(?:\n|$))*(?:[ \t]*>[ \t]*(?:\n[ \t]*>[ \t]*)*)?/, - 'html': /^(?:[ \t]*(?:(?:(?:<(?:article|header|aside|hgroup|blockquote|hr|iframe|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)(?:(?:\s+)(?:[a-zA-Z_:][a-zA-Z0-9_.:-]*)(?:(?:\s+)?=(?:\s+)?(?:[^"'=<>`]+|'[^']*'|"[^"]*"))?)*(?:\s+)?\/?>?)|(?:<\/(?:article|header|aside|hgroup|blockquote|hr|iframe|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)(?:\s+)?>))||(?:<\?(?:[^\?]|\?(?!>))+\?>)|(?:)|(?:))[\s\S]*?[ \t]*?(?:\n{2,}|\s*$))/i, 'paragraph': /^(?:(?:[^\n]+\n?(?![ \t]*([-*_])( *\1){2,} *(?=\n|$)|([ \t]*)(#{1,6})(?:([ \t]+)([^\n]+?))??(?:[ \t]+#+)?[ \t]*(?=\n|$)|(\ {0,3})([^\n]+?)[ \t]*\n\ {0,3}(=|-){1,}[ \t]*(?=\n|$)|[ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$)|(?=[ \t]*>)(?:(?:(?:[ \t]*>[^\n]*\n)*(?:[ \t]*>[^\n]+(?=\n|$))|(?![ \t]*>)(?![ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$))[^\n]+)(?:\n|$))*(?:[ \t]*>[ \t]*(?:\n[ \t]*>[ \t]*)*)?|<(?!(?:a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)\b)(?!mailto:)\w+(?!:\/|[^\w\s@]*@)\b))+)/, 'escape': /^\\([\\`*{}\[\]()#+\-.!_>])/, - 'autoLink': /^<([^ >]+(@|:\/)[^ >]+)>/, - 'tag': /^(?:(?:<(?:[a-zA-Z][a-zA-Z0-9]*)(?:(?:\s+)(?:[a-zA-Z_:][a-zA-Z0-9_.:-]*)(?:(?:\s+)?=(?:\s+)?(?:[^"'=<>`]+|'[^']*'|"[^"]*"))?)*(?:\s+)?\/?>)|(?:<\/(?:[a-zA-Z][a-zA-Z0-9]*)(?:\s+)?>)||(?:<\?(?:[^\?]|\?(?!>))+\?>)|(?:)|(?:))/, 'strong': /^(_)_((?:\\[\s\S]|[^\\])+?)__(?!_)|^(\*)\*((?:\\[\s\S]|[^\\])+?)\*\*(?!\*)/, 'emphasis': /^\b(_)((?:__|\\[\s\S]|[^\\])+?)_\b|^(\*)((?:\*\*|\\[\s\S]|[^\\])+?)\*(?!\*)/, - 'inlineCode': /^(`+)((?!`)[\s\S]*?(?:`\s+|[^`]))?(\1)(?!`)/, - 'break': /^ {2,}\n(?!\s*$)/, - 'inlineText': /^[\s\S]+?(?=[\\)(?:\s+['"]([\s\S]*?)['"])?\s*\)/, - 'shortcutReference': /^(!?\[)((?:\\[\s\S]|[^\[\]])+?)\]/, - 'reference': /^(!?\[)((?:\[[^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*)\]\s*\[((?:\\[\s\S]|[^\[\]])*)\]/ + 'link': /^(!?\[)((?:\[[^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*)\]\(\s*(?:(?!<)((?:\((?:\\[\s\S]|[^\)])*?\)|\\[\s\S]|[\s\S])*?)|<([\s\S]*?)>)(?:\s+['"]([\s\S]*?)['"])?\s*\)/ }, 'gfm': { - 'fences': /^( *)(([`~])\3{2,})[ \t]*([^\n`~]+)?[ \t]*(?:\n([\s\S]*?))??(?:\n\ {0,3}\2\3*[ \t]*(?=\n|$)|$)/, 'paragraph': /^(?:(?:[^\n]+\n?(?![ \t]*([-*_])( *\1){2,} *(?=\n|$)|( *)(([`~])\5{2,})[ \t]*([^\n`~]+)?[ \t]*(?:\n([\s\S]*?))??(?:\n\ {0,3}\4\5*[ \t]*(?=\n|$)|$)|([ \t]*)((?:[*+-]|\d+\.))[ \t][\s\S]+?(?:(?=\n+\8?(?:[-*_][ \t]*){3,}(?:\n|$))|(?=\n+[ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$))|\n{2,}(?![ \t])(?!\8(?:[*+-]|\d+\.)[ \t])|$)|([ \t]*)(#{1,6})(?:([ \t]+)([^\n]+?))??(?:[ \t]+#+)?[ \t]*(?=\n|$)|(\ {0,3})([^\n]+?)[ \t]*\n\ {0,3}(=|-){1,}[ \t]*(?=\n|$)|[ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$)|(?=[ \t]*>)(?:(?:(?:[ \t]*>[^\n]*\n)*(?:[ \t]*>[^\n]+(?=\n|$))|(?![ \t]*>)(?![ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$))[^\n]+)(?:\n|$))*(?:[ \t]*>[ \t]*(?:\n[ \t]*>[ \t]*)*)?|<(?!(?:a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)\b)(?!mailto:)\w+(?!:\/|[^\w\s@]*@)\b))+)/, 'table': /^( *\|(.+))\n( *\|( *[-:]+[-| :]*)\n)((?: *\|.*(?:\n|$))*)/, 'looseTable': /^( *(\S.*\|.*))\n( *([-:]+ *\|[-| :]*)\n)((?:.*\|.*(?:\n|$))*)/, - 'escape': /^\\([\\`*{}\[\]()#+\-.!_>~|])/, - 'url': /^https?:\/\/[^\s<]+[^<.,:;"')\]\s]/, - 'deletion': /^~~(?=\S)([\s\S]*?\S)~~/, - 'inlineText': /^[\s\S]+?(?=[\\~|])/ }, 'pedantic': { - 'heading': /^([ \t]*)(#{1,6})([ \t]*)([^\n]*?)[ \t]*#*[ \t]*(?=\n|$)/, 'strong': /^(_)_(?=\S)([\s\S]*?\S)__(?!_)|^(\*)\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, 'emphasis': /^(_)(?=\S)([\s\S]*?\S)_(?!_)|^(\*)(?=\S)([\s\S]*?\S)\*(?!\*)/ }, @@ -53,22 +28,12 @@ module.exports = { 'item': /([ \t]*)((?:[*+-]|\d+[\.\)]))( {1,4}(?! )| |\t)[^\n]*(?:\n(?!\1(?:[*+-]|\d+[\.\)])[ \t])[^\n]*)*/gm, 'bullet': /(?:[*+-]|\d+[\.\)])/, 'indent': /^([ \t]*)((?:[*+-]|\d+[\.\)]))( {1,4}(?! )| |\t)/, - 'html': /^(?:[ \t]*(?:(?:(?:<(?:article|header|aside|hgroup|blockquote|hr|iframe|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)(?:(?:\s+)(?:[a-zA-Z_:][a-zA-Z0-9_.:-]*)(?:(?:\s+)?=(?:\s+)?(?:[^"'=<>`]+|'[^']*'|"[^"]*"))?)*(?:\s+)?\/?>?)|(?:<\/(?:article|header|aside|hgroup|blockquote|hr|iframe|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)(?:\s+)?>))|(?:)|(?:<\?(?:[^\?]|\?(?!>))+\?>)|(?:)|(?:))[\s\S]*?[ \t]*?(?:\n{2,}|\s*$))/i, - 'tag': /^(?:(?:<(?:[a-zA-Z][a-zA-Z0-9]*)(?:(?:\s+)(?:[a-zA-Z_:][a-zA-Z0-9_.:-]*)(?:(?:\s+)?=(?:\s+)?(?:[^"'=<>`]+|'[^']*'|"[^"]*"))?)*(?:\s+)?\/?>)|(?:<\/(?:[a-zA-Z][a-zA-Z0-9]*)(?:\s+)?>)|(?:)|(?:<\?(?:[^\?]|\?(?!>))+\?>)|(?:)|(?:))/, 'link': /^(!?\[)((?:(?:\[(?:\[(?:\\[\s\S]|[^\[\]])*?\]|\\[\s\S]|[^\[\]])*?\])|\\[\s\S]|[^\[\]])*?)\]\(\s*(?:(?!<)((?:(?:\((?:\\[\s\S]|[^\(\)\s])*?\)|\\[\s\S]|[^\(\)\s])*?))|<([^\n]*?)>)(?:\s+(?:\'((?:\\[\s\S]|[^\'])*?)\'|"((?:\\[\s\S]|[^"])*?)"|\(((?:\\[\s\S]|[^\)])*?)\)))?\s*\)/, - 'reference': /^(!?\[)((?:(?:\[(?:\[(?:\\[\s\S]|[^\[\]])*?\]|\\[\s\S]|[^\[\]])*?\])|\\[\s\S]|[^\[\]])*?)\]\s*\[((?:\\[\s\S]|[^\[\]])*)\]/, 'paragraph': /^(?:(?:[^\n]+\n?(?!\ {0,3}([-*_])( *\1){2,} *(?=\n|$)|(\ {0,3})(#{1,6})(?:([ \t]+)([^\n]+?))??(?:[ \t]+#+)?\ {0,3}(?=\n|$)|(?=\ {0,3}>)(?:(?:(?:\ {0,3}>[^\n]*\n)*(?:\ {0,3}>[^\n]+(?=\n|$))|(?!\ {0,3}>)(?!\ {0,3}\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?\ {0,3}(?=\n|$))[^\n]+)(?:\n|$))*(?:\ {0,3}>\ {0,3}(?:\n\ {0,3}>\ {0,3})*)?|<(?!(?:a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)\b)(?!mailto:)\w+(?!:\/|[^\w\s@]*@)\b))+)/, 'blockquote': /^(?=[ \t]*>)(?:(?:(?:[ \t]*>[^\n]*\n)*(?:[ \t]*>[^\n]+(?=\n|$))|(?![ \t]*>)(?![ \t]*([-*_])( *\1){2,} *(?=\n|$)|([ \t]*)((?:[*+-]|\d+\.))[ \t][\s\S]+?(?:(?=\n+\3?(?:[-*_][ \t]*){3,}(?:\n|$))|(?=\n+[ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$))|\n{2,}(?![ \t])(?!\3(?:[*+-]|\d+\.)[ \t])|$)|( *)(([`~])\10{2,})[ \t]*([^\n`~]+)?[ \t]*(?:\n([\s\S]*?))??(?:\n\ {0,3}\9\10*[ \t]*(?=\n|$)|$)|((?:(?: {4}|\t)[^\n]*\n?((?:[ \t]*\n)*))+)|[ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$))[^\n]+)(?:\n|$))*(?:[ \t]*>[ \t]*(?:\n[ \t]*>[ \t]*)*)?/, 'escape': /^\\(\n|[\\`*{}\[\]()#+\-.!_>"$%&',\/:;<=?@^~|])/ }, 'commonmarkGFM': { 'paragraph': /^(?:(?:[^\n]+\n?(?!\ {0,3}([-*_])( *\1){2,} *(?=\n|$)|( *)(([`~])\5{2,})\ {0,3}([^\n`~]+)?\ {0,3}(?:\n([\s\S]*?))??(?:\n\ {0,3}\4\5*\ {0,3}(?=\n|$)|$)|(\ {0,3})((?:[*+-]|\d+\.))[ \t][\s\S]+?(?:(?=\n+\8?(?:[-*_]\ {0,3}){3,}(?:\n|$))|(?=\n+\ {0,3}\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?\ {0,3}(?=\n|$))|\n{2,}(?![ \t])(?!\8(?:[*+-]|\d+\.)[ \t])|$)|(\ {0,3})(#{1,6})(?:([ \t]+)([^\n]+?))??(?:[ \t]+#+)?\ {0,3}(?=\n|$)|(?=\ {0,3}>)(?:(?:(?:\ {0,3}>[^\n]*\n)*(?:\ {0,3}>[^\n]+(?=\n|$))|(?!\ {0,3}>)(?!\ {0,3}\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?\ {0,3}(?=\n|$))[^\n]+)(?:\n|$))*(?:\ {0,3}>\ {0,3}(?:\n\ {0,3}>\ {0,3})*)?|<(?!(?:a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)\b)(?!mailto:)\w+(?!:\/|[^\w\s@]*@)\b))+)/ - }, - 'breaks': { - 'break': /^ *\n(?!\s*$)/, - 'inlineText': /^[\s\S]+?(?=[\\[ \t]?/gm; +var EXPRESSION_BULLET = /^([ \t]*)([*+-]|\d+[.)])( {1,4}(?! )| |\t)([^\n]*)/; +var EXPRESSION_PEDANTIC_BULLET = /^([ \t]*)([*+-]|\d+[.)])([ \t]+)/; +var EXPRESSION_INITIAL_INDENT = /^( {1,4}|\t)?/gm; +var EXPRESSION_INITIAL_TAB = /^( {4}|\t)?/gm; +var EXPRESSION_HTML_LINK_OPEN = /^/i; +var EXPRESSION_LOOSE_LIST_ITEM = /\n\n(?!\s*$)/; +var EXPRESSION_TASK_ITEM = /^\[([\ \t]|x|X)\][\ \t]/; + /* * Characters. */ -var AT_SIGN = '@'; -var CARET = '^'; -var EQUALS = '='; -var EXCLAMATION_MARK = '!'; +var C_SLASH = '\\'; +var C_UNDERSCORE = '_'; +var C_ASTERISK = '*'; +var C_TICK = '`'; +var C_AT_SIGN = '@'; +var C_HASH = '#'; +var C_PLUS = '+'; +var C_DASH = '-'; +var C_DOT = '.'; +var C_PIPE = '|'; +var C_PERCENTAGE = '%'; +var C_DOUBLE_QUOTE = '"'; +var C_SINGLE_QUOTE = '\''; +var C_DOLLAR = '$'; +var C_AMPERSAND = '&'; +var C_COMMA = ','; +var C_BACKSLASH = '/'; +var C_COLON = ':'; +var C_SEMI_COLON = ';'; +var C_QUESTION_MARK = '?'; +var C_CARET = '^'; +var C_EQUALS = '='; +var C_EXCLAMATION_MARK = '!'; +var C_TILDE = '~'; +var C_LT = '<'; +var C_GT = '>'; +var C_BRACKET_OPEN = '['; +var C_BRACKET_CLOSE = ']'; +var C_BRACE_OPEN = '{'; +var C_BRACE_CLOSE = '}'; +var C_PAREN_OPEN = '('; +var C_PAREN_CLOSE = ')'; +var C_SPACE = ' '; +var C_FORM_FEED = '\f'; +var C_NEWLINE = '\n'; +var C_CARRIAGE_RETURN = '\r'; +var C_TAB = '\t'; +var C_VERTICAL_TAB = '\v'; +var C_NO_BREAK_SPACE = '\u00a0'; +var C_OGHAM_SPACE = '\u1680'; +var C_MONGOLIAN_VOWEL_SEPARATOR = '\u180e'; +var C_EN_QUAD = '\u2000'; +var C_EM_QUAD = '\u2001'; +var C_EN_SPACE = '\u2002'; +var C_EM_SPACE = '\u2003'; +var C_THREE_PER_EM_SPACE = '\u2004'; +var C_FOUR_PER_EM_SPACE = '\u2005'; +var C_SIX_PER_EM_SPACE = '\u2006'; +var C_FIGURE_SPACE = '\u2007'; +var C_PUNCTUATION_SPACE = '\u2008'; +var C_THIN_SPACE = '\u2009'; +var C_HAIR_SPACE = '\u200a'; +var C_LINE_SEPARATOR = '​\u2028'; +var C_PARAGRAPH_SEPARATOR = '​\u2029'; +var C_NARROW_NO_BREAK_SPACE = '\u202f'; +var C_IDEOGRAPHIC_SPACE = '\u3000'; +var C_ZERO_WIDTH_NO_BREAK_SPACE = '\ufeff'; +var C_X_LOWER = 'x'; + +/* + * Character codes. + */ + +var CC_A_LOWER = 'a'.charCodeAt(0); +var CC_A_UPPER = 'A'.charCodeAt(0); +var CC_Z_LOWER = 'z'.charCodeAt(0); +var CC_Z_UPPER = 'Z'.charCodeAt(0); +var CC_0 = '0'.charCodeAt(0); +var CC_9 = '9'.charCodeAt(0); + +/* + * Protocols. + */ + +var HTTP_PROTOCOL = 'http://'; +var HTTPS_PROTOCOL = 'https://'; var MAILTO_PROTOCOL = 'mailto:'; -var NEW_LINE = '\n'; -var SPACE = ' '; -var TAB = '\t'; -var EMPTY = ''; -var LT = '<'; -var GT = '>'; -var BRACKET_OPEN = '['; + +var PROTOCOLS = [ + HTTP_PROTOCOL, + HTTPS_PROTOCOL, + MAILTO_PROTOCOL +]; + +var PROTOCOLS_LENGTH = PROTOCOLS.length; /* - * Types. + * Textual constants. */ +var YAML_FENCE = repeat(C_DASH, 3); +var CODE_INDENT = repeat(C_SPACE, CODE_INDENT_LENGTH); +var EMPTY = ''; var BLOCK = 'block'; var INLINE = 'inline'; -var HORIZONTAL_RULE = 'horizontalRule'; -var HTML = 'html'; -var YAML = 'yaml'; -var TABLE = 'table'; -var TABLE_CELL = 'tableCell'; -var TABLE_HEADER = 'tableHeader'; -var TABLE_ROW = 'tableRow'; -var PARAGRAPH = 'paragraph'; -var TEXT = 'text'; -var CODE = 'code'; -var LIST = 'list'; -var LIST_ITEM = 'listItem'; -var FOOTNOTE_DEFINITION = 'footnoteDefinition'; -var HEADING = 'heading'; -var BLOCKQUOTE = 'blockquote'; -var LINK = 'link'; -var IMAGE = 'image'; -var FOOTNOTE = 'footnote'; -var ESCAPE = 'escape'; -var STRONG = 'strong'; -var EMPHASIS = 'emphasis'; -var DELETE = 'delete'; -var INLINE_CODE = 'inlineCode'; -var BREAK = 'break'; -var ROOT = 'root'; - -/** - * Wrapper around he's `decode` function. +var COMMENT_START = ''; +var CDATA_START = ''; +var COMMENT_END_CHAR = COMMENT_END.charAt(0); +var CDATA_END_CHAR = CDATA_END.charAt(0); +var COMMENT_START_LENGTH = COMMENT_START.length; +var COMMENT_END_LENGTH = COMMENT_END.length; +var CDATA_START_LENGTH = CDATA_START.length; +var CDATA_END_LENGTH = CDATA_END.length; + +/* + * Node types. + */ + +var T_HORIZONTAL_RULE = 'horizontalRule'; +var T_HTML = 'html'; +var T_YAML = 'yaml'; +var T_TABLE = 'table'; +var T_TABLE_CELL = 'tableCell'; +var T_TABLE_HEADER = 'tableHeader'; +var T_TABLE_ROW = 'tableRow'; +var T_PARAGRAPH = 'paragraph'; +var T_TEXT = 'text'; +var T_CODE = 'code'; +var T_LIST = 'list'; +var T_LIST_ITEM = 'listItem'; +var T_DEFINITION = 'definition' +var T_FOOTNOTE_DEFINITION = 'footnoteDefinition'; +var T_HEADING = 'heading'; +var T_BLOCKQUOTE = 'blockquote'; +var T_LINK = 'link'; +var T_IMAGE = 'image'; +var T_FOOTNOTE = 'footnote'; +var T_ESCAPE = 'escape'; +var T_STRONG = 'strong'; +var T_EMPHASIS = 'emphasis'; +var T_DELETE = 'delete'; +var T_INLINE_CODE = 'inlineCode'; +var T_BREAK = 'break'; +var T_ROOT = 'root'; + +/* + * Available table alignments. + */ + +var TABLE_ALIGN_LEFT = 'left'; +var TABLE_ALIGN_CENTER = 'center'; +var TABLE_ALIGN_RIGHT = 'right'; +var TABLE_ALIGN_NONE = null; + +/* + * Available reference types. + */ + +var REFERENCE_TYPE_SHORTCUT = 'shortcut'; +var REFERENCE_TYPE_COLLAPSED = 'collapsed'; +var REFERENCE_TYPE_FULL = 'full'; + +/* + * A map of characters, and their column length, + * which can be used as indentation. + */ + +var INDENTATION_CHARACTERS = {}; + +INDENTATION_CHARACTERS[C_SPACE] = SPACE_SIZE; +INDENTATION_CHARACTERS[C_TAB] = TAB_SIZE; + +/* + * A map of two functions which can create list items. + */ + +var LIST_ITEM_MAP = {}; + +LIST_ITEM_MAP.true = renderPedanticListItem; +LIST_ITEM_MAP.false = renderNormalListItem; + +/* + * Define nodes of a type which can be merged. + */ + +var MERGEABLE_NODES = {}; + +/** TODO */ +function isWhiteSpace(character) { + return character === C_SPACE || + character === C_FORM_FEED || + character === C_NEWLINE || + character === C_CARRIAGE_RETURN || + character === C_TAB || + character === C_VERTICAL_TAB || + character === C_NO_BREAK_SPACE || + character === C_OGHAM_SPACE || + character === C_MONGOLIAN_VOWEL_SEPARATOR || + character === C_EN_QUAD || + character === C_EM_QUAD || + character === C_EN_SPACE || + character === C_EM_SPACE || + character === C_THREE_PER_EM_SPACE || + character === C_FOUR_PER_EM_SPACE || + character === C_SIX_PER_EM_SPACE || + character === C_FIGURE_SPACE || + character === C_PUNCTUATION_SPACE || + character === C_THIN_SPACE || + character === C_HAIR_SPACE || + character === C_LINE_SEPARATOR || + character === C_PARAGRAPH_SEPARATOR || + character === C_NARROW_NO_BREAK_SPACE || + character === C_IDEOGRAPHIC_SPACE || + character === C_ZERO_WIDTH_NO_BREAK_SPACE; +} + +/** TODO */ +function isAlphabetic(character) { + var code = character.charCodeAt(0); + + return (code >= CC_A_LOWER && code <= CC_Z_LOWER) || + (code >= CC_A_UPPER && code <= CC_Z_UPPER); +} + +/** TODO */ +function isNumeric(character) { + var code = character.charCodeAt(0); + + return code >= CC_0 && code <= CC_9; +} + +/** + * Wrapper around he’s `decode` function. * * @example * decode('&'); // '&' @@ -139,7 +356,7 @@ function descapeFactory(scope, key) { if (scope[key] !== globalExpression) { globalExpression = scope[key]; expression = new RegExp( - scope[key].source.replace(CARET, EMPTY), 'g' + scope[key].source.replace(C_CARET, EMPTY), 'g' ); } @@ -165,41 +382,6 @@ function descapeFactory(scope, key) { return descape; } -/* - * Tab size. - */ - -var TAB_SIZE = 4; - -/* - * Expressions. - */ - -var EXPRESSION_RIGHT_ALIGNMENT = /^[ \t]*-+:[ \t]*$/; -var EXPRESSION_CENTER_ALIGNMENT = /^[ \t]*:-+:[ \t]*$/; -var EXPRESSION_LEFT_ALIGNMENT = /^[ \t]*:-+[ \t]*$/; -var EXPRESSION_TABLE_FENCE = /^[ \t]*|\|[ \t]*$/g; -var EXPRESSION_TABLE_BORDER = /[ \t]*\|[ \t]*/; -var EXPRESSION_BLOCK_QUOTE = /^[ \t]*>[ \t]?/gm; -var EXPRESSION_BULLET = /^([ \t]*)([*+-]|\d+[.)])( {1,4}(?! )| |\t)([^\n]*)/; -var EXPRESSION_PEDANTIC_BULLET = /^([ \t]*)([*+-]|\d+[.)])([ \t]+)/; -var EXPRESSION_INITIAL_INDENT = /^( {1,4}|\t)?/gm; -var EXPRESSION_INITIAL_TAB = /^( {4}|\t)?/gm; -var EXPRESSION_HTML_LINK_OPEN = /^/i; -var EXPRESSION_LOOSE_LIST_ITEM = /\n\n(?!\s*$)/; -var EXPRESSION_TASK_ITEM = /^\[([\ \t]|x|X)\][\ \t]/; - -/* - * A map of characters, and their column length, - * which can be used as indentation. - */ - -var INDENTATION_CHARACTERS = {}; - -INDENTATION_CHARACTERS[SPACE] = SPACE.length; -INDENTATION_CHARACTERS[TAB] = TAB_SIZE; - /** * Gets indentation information for a line. * @@ -263,7 +445,7 @@ function getIndent(value) { * @return {string} - Unindented `value`. */ function removeIndentation(value, maximum) { - var values = value.split(NEW_LINE); + var values = value.split(C_NEWLINE); var position = values.length + 1; var minIndent = Infinity; var matrix = []; @@ -272,7 +454,7 @@ function removeIndentation(value, maximum) { var stops; var padding; - values.unshift(repeat(SPACE, maximum) + EXCLAMATION_MARK); + values.unshift(repeat(C_SPACE, maximum) + C_EXCLAMATION_MARK); while (position--) { indentation = getIndent(values[position]); @@ -310,7 +492,7 @@ function removeIndentation(value, maximum) { minIndent && index !== minIndent ) { - padding = TAB; + padding = C_TAB; } else { padding = EMPTY; } @@ -323,74 +505,60 @@ function removeIndentation(value, maximum) { values.shift(); - return values.join(NEW_LINE); + return values.join(C_NEWLINE); } /** - * Ensure that `value` is at least indented with - * `indent` spaces. Does not support tabs. Does support - * multiple lines. + * Get the alignment from a table rule cell. * * @example - * ensureIndentation('foo', 2); // ' foo' - * ensureIndentation(' foo', 4); // ' foo' + * getAlignment(':-']); + * // 'left'; * * @param {string} value - * @param {number} indent - The maximum amount of - * spacing to insert. - * @return {string} - indented `value`. + * @return {string?} */ -function ensureIndentation(value, indent) { - var values = value.split(NEW_LINE); - var length = values.length; +function getAlignment(value) { var index = -1; - var line; - var position; + var length = value.length; + var character; + var hasDash; + var left; while (++index < length) { - line = values[index]; - - position = -1; + character = value.charAt(index); - while (++position < indent) { - if (line.charAt(position) !== SPACE) { - values[index] = repeat(SPACE, indent - position) + line; - break; + if (character === C_DASH) { + hasDash = true; + } else if (character === C_COLON) { + if (hasDash) { + return left ? TABLE_ALIGN_CENTER : TABLE_ALIGN_RIGHT; } + + left = true; } } - return values.join(NEW_LINE); + return left ? TABLE_ALIGN_LEFT : TABLE_ALIGN_NONE; } /** * Get the alignment from a table rule. * * @example - * getAlignment([':-', ':-:', '-:', '--']); + * getAlignments([':-', ':-:', '-:', '--']); * // ['left', 'center', 'right', null]; * * @param {Array.} cells * @return {Array.} */ -function getAlignment(cells) { +function getAlignments(cells) { var results = []; var index = -1; var length = cells.length; - var alignment; while (++index < length) { - alignment = cells[index]; - - if (EXPRESSION_RIGHT_ALIGNMENT.test(alignment)) { - results[index] = 'right'; - } else if (EXPRESSION_CENTER_ALIGNMENT.test(alignment)) { - results[index] = 'center'; - } else if (EXPRESSION_LEFT_ALIGNMENT.test(alignment)) { - results[index] = 'left'; - } else { - results[index] = null; - } + results[index] = getAlignment(cells[index]); } return results; @@ -447,7 +615,7 @@ function stateToggler(key, state) { } /** - * Construct a state toggler which doesn't toggle. + * Construct a state toggler which doesn’t toggle. * * @example * var context = {}; @@ -479,12 +647,6 @@ function noopToggler() { return enter; } -/* - * Define nodes of a type which can be merged. - */ - -var MERGEABLE_NODES = {}; - /** * Merge two text nodes: `node` into `prev`. * @@ -549,328 +711,1657 @@ MERGEABLE_NODES.list = function (prev, node) { * tokenizeNewline(eat, '\n\n'); * * @param {function(string)} eat - * @param {string} $0 - Lines. + * @param {string} value - Rest of content. */ -function tokenizeNewline(eat, $0) { - if ($0.length > 1) { - this.currentBullet = null; - this.previousBullet = null; +function tokenizeNewline(eat, value) { + var self = this; + var length; + var subvalue; + var queue; + var index; + var character = value.charAt(0); + + if (character !== C_NEWLINE) { + return; } - eat($0); -} + index = 0; + length = value.length; + subvalue = C_NEWLINE; + queue = EMPTY; -/** - * Tokenise an indented code block. - * - * @example - * tokenizeCode(eat, '\tfoo'); - * - * @param {function(string)} eat - * @param {string} $0 - Whole code. - * @return {Node} - `code` node. - */ -function tokenizeCode(eat, $0) { - $0 = trimTrailingLines($0); + while (++index < length) { + character = value.charAt(index); - return eat($0)(this.renderCodeBlock( - removeIndentation($0, TAB_SIZE), null, eat) - ); -} + if (!isWhiteSpace(character)) { + break; + } -/** - * Tokenise a fenced code block. - * - * @example - * var $0 = '```js\nfoo()\n```'; - * tokenizeFences(eat, $0, '', '```', '`', 'js', 'foo()\n'); - * - * @param {function(string)} eat - * @param {string} $0 - Whole code. - * @param {string} $1 - Initial spacing. - * @param {string} $2 - Initial fence. - * @param {string} $3 - Fence marker. - * @param {string} $4 - Programming language flag. - * @param {string} $5 - Content. - * @return {Node} - `code` node. - */ -function tokenizeFences(eat, $0, $1, $2, $3, $4, $5) { - $0 = trimTrailingLines($0); + queue += character; - /* - * If the initial fence was preceded by spaces, - * exdent that amount of white space from the code - * block. Because it's possible that the code block - * is exdented, we first have to ensure at least - * those spaces are available. - */ + if (character === C_NEWLINE) { + subvalue += queue; + queue = EMPTY; + } + } - if ($1) { - $5 = removeIndentation(ensureIndentation($5, $1.length), $1.length); + if (subvalue.length > 1) { + self.currentBullet = null; + self.previousBullet = null; } - return eat($0)(this.renderCodeBlock($5, $4, eat)); + eat(subvalue); } +tokenizeNewline.algorithmic = true; + /** - * Tokenise an ATX-style heading. + * Tokenise an indented code block. * * @example - * tokenizeHeading(eat, ' # foo', ' ', '#', ' ', 'foo'); + * tokenizeCode(eat, '\tfoo'); * * @param {function(string)} eat - * @param {string} $0 - Whole heading. - * @param {string} $1 - Initial spacing. - * @param {string} $2 - Hashes. - * @param {string} $3 - Internal spacing. - * @param {string} $4 - Content. - * @return {Node} - `heading` node. - */ -function tokenizeHeading(eat, $0, $1, $2, $3, $4) { - var now = eat.now(); + * @param {string} value - Rest of content. + * @return {Node?} - `code` node. + */ +function tokenizeCode(eat, value) { + var index = -1; + var length = value.length; + var character; + var subvalue = EMPTY; + var content = EMPTY; + var subvalueQueue = EMPTY; + var contentQueue = EMPTY; + var blankQueue; + var indent; - now.column += ($1 + $2 + ($3 || '')).length; + while (++index < length) { + character = value.charAt(index); - return eat($0)(this.renderHeading($4, $2.length, now)); -} + if (indent) { + indent = false; -/** - * Tokenise a Setext-style heading. - * - * @example - * tokenizeLineHeading(eat, 'foo\n===', '', 'foo', '='); - * - * @param {function(string)} eat - * @param {string} $0 - Whole heading. - * @param {string} $1 - Initial spacing. - * @param {string} $2 - Content. - * @param {string} $3 - Underline marker. - * @return {Node} - `heading` node. - */ -function tokenizeLineHeading(eat, $0, $1, $2, $3) { - var now = eat.now(); + subvalue += subvalueQueue; + content += contentQueue; + subvalueQueue = contentQueue = EMPTY; - now.column += $1.length; + if (character === C_NEWLINE) { + subvalueQueue = contentQueue = character; + } else { + subvalue += character; + content += character; - return eat($0)(this.renderHeading($2, $3 === EQUALS ? 1 : 2, now)); -} + while (++index < length) { + character = value.charAt(index); -/** - * Tokenise a horizontal rule. - * - * @example - * tokenizeHorizontalRule(eat, '***'); - * - * @param {function(string)} eat - * @param {string} $0 - Whole rule. - * @return {Node} - `horizontalRule` node. - */ -function tokenizeHorizontalRule(eat, $0) { - return eat($0)(this.renderVoid(HORIZONTAL_RULE)); -} + if (!character || character === C_NEWLINE) { + contentQueue = subvalueQueue = character; + break; + } -/** - * Tokenise a blockquote. - * - * @example - * tokenizeBlockquote(eat, '> Foo'); - * - * @param {function(string)} eat - * @param {string} $0 - Whole blockquote. - * @return {Node} - `blockquote` node. - */ -function tokenizeBlockquote(eat, $0) { - var now = eat.now(); - var indent = this.indent(now.line); - var value = trimTrailingLines($0); - var add = eat(value); + subvalue += character; + content += character; + } + } + } else if ( + character === C_SPACE && + value.charAt(index + 1) === C_SPACE && + value.charAt(index + 2) === C_SPACE && + value.charAt(index + 3) === C_SPACE + ) { + subvalueQueue += CODE_INDENT; + index += 3; + indent = true; + } else if (character === C_TAB) { + subvalueQueue += character; + indent = true; + } else { + blankQueue = EMPTY; - value = value.replace(EXPRESSION_BLOCK_QUOTE, function (prefix) { - indent(prefix.length); + while (character === C_TAB || character === C_SPACE) { + blankQueue += character; + character = value.charAt(++index); + } - return ''; - }); + if (character !== C_NEWLINE) { + break; + } - return add(this.renderBlockquote(value, now)); + subvalueQueue += blankQueue + character; + contentQueue += character; + } + } + + if (content) { + return eat(subvalue)(this.renderCodeBlock(content, null, eat)); + } } +tokenizeCode.algorithmic = true; + /** - * Tokenise a list. + * Tokenise a fenced code block. * * @example - * tokenizeList(eat, '- Foo', '', '-'); + * tokenizeFences(eat, '```js\nfoo()\n```'); * * @param {function(string)} eat - * @param {string} $0 - Whole list. - * @param {string} $1 - Indent. - * @param {string} $2 - Bullet. - * @return {Node} - `list` node. + * @param {string} value - Rest of content. + * @return {Node?} - `code` node. */ -function tokenizeList(eat, $0, $1, $2) { +function tokenizeFences(eat, value) { var self = this; - var firstBullet = $2; - var value = trimTrailingLines($0); - var matches = value.match(self.rules.item); - var length = matches.length; - var index = 0; - var isLoose = false; - var now; - var bullet; - var item; - var enterTop; - var exitBlockquote; - var node; + var settings = self.options; + var length = value.length + 1; + var index = -1; + var subvalue = EMPTY; + var fenceCount; + var marker; + var character; + var flag; + var queue; + var content; + var exdentedContent; + var closing; + var exdentedClosing; var indent; - var size; - var position; - var end; - /* - * Determine if all list-items belong to the - * same list. - */ + if (!settings.gfm) { + return; + } - if (!self.options.pedantic) { + /** Eat zero or more space / tab characters. */ + function eatOptionalSpacing() { while (++index < length) { - bullet = self.rules.bullet.exec(matches[index])[0]; + character = value.charAt(index); - if ( - firstBullet !== bullet && - ( - firstBullet.length === 1 && bullet.length === 1 || - bullet.charAt(bullet.length - 1) !== - firstBullet.charAt(firstBullet.length - 1) - ) - ) { + if (character !== C_SPACE && character !== C_TAB) { + index--; + break; + } + + subvalue += character; + } + } + + /* + * Eat initial spacing. + */ + + eatOptionalSpacing(); + indent = index + 1; + + /* + * Eat the fence. + */ + + if (character !== C_TILDE && character !== C_TICK) { + return; + } + + index++; + marker = character; + fenceCount = 1; + subvalue += character; + + while (++index < length) { + character = value.charAt(index); + + if (character !== marker) { + index--; + break; + } + + subvalue += character; + fenceCount++; + } + + if (fenceCount < MIN_FENCE_COUNT) { + return; + } + + /* + * Eat spacing before flag. + */ + + eatOptionalSpacing(); + + /* + * Eat flag. + */ + + flag = queue = EMPTY; + + while (++index < length) { + character = value.charAt(index); + + if ( + character === C_NEWLINE || + character === C_TILDE || + character === C_TICK + ) { + index--; + break; + } + + if (character === C_SPACE || character === C_TAB) { + queue += character; + } else { + if (queue) { + flag += queue; + queue = EMPTY; + } + + flag += character; + } + } + + if (character && character !== C_NEWLINE) { + return; + } + + subvalue += flag; + + if (queue) { + subvalue += queue; + } + + queue = closing = exdentedClosing = content = exdentedContent = EMPTY; + + /* + * Eat content. + */ + + while (++index < length) { + character = value.charAt(index); + content += closing; + exdentedContent += exdentedClosing; + closing = exdentedClosing = EMPTY; + + if (character !== C_NEWLINE) { + content += character; + exdentedClosing += character; + continue; + } + + /* + * Add the newline to `subvalue` if its the first + * character. Otherwise, add it to the `closing` + * queue. + */ + + if (!content) { + subvalue += character; + } else { + closing += character; + exdentedClosing += character; + } + + queue = EMPTY; + + while (++index < length) { + character = value.charAt(index); + + if (character !== C_SPACE) { + index--; + break; + } + + queue += character; + } + + closing += queue; + exdentedClosing += queue.slice(indent); + + if (queue.length >= CODE_INDENT_LENGTH) { + continue; + } + + queue = EMPTY; + + while (++index < length) { + character = value.charAt(index); + + if (character !== marker) { + index--; + break; + } + + queue += character; + } + + closing += queue; + exdentedClosing += queue; + + if (queue.length < fenceCount) { + continue; + } + + eatOptionalSpacing(); + + character = value.charAt(++index); + + if (!character || character === C_NEWLINE) { + break; + } + } + + subvalue += content + closing; + + return eat(subvalue)(self.renderCodeBlock(exdentedContent, flag, eat)); +} + +tokenizeFences.algorithmic = true; + +/** + * Tokenise an ATX-style heading. + * + * @example + * tokenizeHeading(eat, ' # foo'); + * + * @param {function(string)} eat + * @param {string} value - Rest of content. + * @return {Node?} - `heading` node. + */ +function tokenizeHeading(eat, value) { + var self = this; + var settings = self.options; + var length = value.length + 1; + var index = -1; + var now = eat.now(); + var subvalue = EMPTY; + var content = EMPTY; + var character; + var queue; + var depth; + + /* + * Eat initial spacing. + */ + + while (++index < length) { + character = value.charAt(index); + + if (character !== C_SPACE && character !== C_TAB) { + index--; + break; + } + + subvalue += character; + } + + /* + * Eat hashes. + */ + + depth = 0; + length = index + MAX_ATX_COUNT + 1; + + while (++index <= length) { + character = value.charAt(index); + + if (character !== C_HASH) { + index--; + break; + } + + subvalue += character; + depth++; + } + + if ( + !depth || + (!settings.pedantic && value.charAt(index + 1) === C_HASH) + ) { + return; + } + + length = value.length + 1; + + /* + * Eat intermediate white-space. + */ + + queue = EMPTY; + + while (++index < length) { + character = value.charAt(index); + + if (character !== C_SPACE && character !== C_TAB) { + index--; + break; + } + + queue += character; + } + + /* + * Exit when not in pedantic mode without spacing. + */ + + if ( + !settings.pedantic && + !queue.length && + character && + character !== C_NEWLINE + ) { + return; + } + + /* + * Eat content. + */ + + subvalue += queue; + queue = content = EMPTY; + + while (++index < length) { + character = value.charAt(index); + + if (!character || character === C_NEWLINE) { + break; + } + + if ( + character !== C_SPACE && + character !== C_TAB && + character !== C_HASH + ) { + content += queue + character; + queue = EMPTY; + continue; + } + + while (character === C_SPACE || character === C_TAB) { + queue += character; + character = value.charAt(++index); + } + + while (character === C_HASH) { + queue += character; + character = value.charAt(++index); + } + + while (character === C_SPACE || character === C_TAB) { + queue += character; + character = value.charAt(++index); + } + + if (queue) { + index--; + } + } + + now.column += subvalue.length; + subvalue += content + queue; + + return eat(subvalue)(self.renderHeading(content, depth, now)); +} + +tokenizeHeading.algorithmic = true; + +/** + * Tokenise a Setext-style heading. + * + * @example + * tokenizeLineHeading(eat, 'foo\n==='); + * + * @param {function(string)} eat + * @param {string} value - Rest of content. + * @return {Node?} - `heading` node. + */ +function tokenizeLineHeading(eat, value) { + var self = this; + var now = eat.now(); + var length = value.length; + var index = -1; + var subvalue = EMPTY; + var content; + var queue; + var character; + var marker; + var depth; + + /* + * Eat initial indentation. + */ + + while (++index < length) { + character = value.charAt(index); + + if (character !== C_SPACE || index >= MAX_LINE_HEADING_INDENT) { + index--; + break; + } + + subvalue += character; + } + + /* + * Eat content. + */ + + content = queue = EMPTY; + + while (++index < length) { + character = value.charAt(index); + + if (character === C_NEWLINE) { + index--; + break; + } + + if (character === C_SPACE || character === C_TAB) { + queue += character; + } else { + content += queue + character; + queue = EMPTY; + } + } + + now.column += subvalue.length; + subvalue += content + queue; + + /* + * Ensure the content is followed by a newline and a + * valid marker. + */ + + character = value.charAt(++index); + marker = value.charAt(++index); + + if ( + character !== C_NEWLINE || + (marker !== C_EQUALS && marker !== C_DASH) + ) { + return; + } + + subvalue += character; + + /* + * Eat Setext-line. + */ + + queue = marker; + depth = marker === C_EQUALS ? 1 : 2; + + while (++index < length) { + character = value.charAt(index); + + if (character !== marker) { + if (character !== C_NEWLINE) { + return; + } + + index--; + break; + } + + queue += character; + } + + return eat(subvalue + queue)(self.renderHeading(content, depth, now)); +} + +tokenizeLineHeading.algorithmic = true; + +/** + * Tokenise a horizontal rule. + * + * @example + * tokenizeHorizontalRule(eat, '***'); + * + * @param {function(string)} eat + * @param {string} value - Rest of content. + * @return {Node?} - `horizontalRule` node. + */ +function tokenizeHorizontalRule(eat, value) { + var index = -1; + var length = value.length + 1; + var subvalue = EMPTY; + var character; + var marker; + var markerCount; + var queue; + + while (++index < length) { + character = value.charAt(index); + + if (character !== C_TAB && character !== C_SPACE) { + break; + } + + subvalue += character; + } + + if ( + character !== C_DASH && + character !== C_ASTERISK && + character !== C_UNDERSCORE + ) { + return; + } + + marker = character; + subvalue += character; + markerCount = 1; + queue = EMPTY; + + while (++index < length) { + character = value.charAt(index); + + if (character === marker) { + markerCount++; + subvalue += queue + marker; + queue = EMPTY; + } else if (character === C_SPACE) { + queue += character; + } else if ( + markerCount >= HORIZONTAL_RULE_MARKER_COUNT && + (!character || character === C_NEWLINE) + ) { + subvalue += queue; + + return eat(subvalue)(this.renderVoid(T_HORIZONTAL_RULE)); + } else { + return; + } + } +} + +tokenizeHorizontalRule.algorithmic = true; + +/** + * Tokenise a blockquote. + * + * @example + * tokenizeBlockquote(eat, '> Foo'); + * + * @param {function(string)} eat + * @param {string} $0 - Whole blockquote. + * @return {Node} - `blockquote` node. + */ +function tokenizeBlockquote(eat, $0) { + var now = eat.now(); + var indent = this.indent(now.line); + var value = trimTrailingLines($0); + var add = eat(value); + + value = value.replace(EXPRESSION_BLOCK_QUOTE, function (prefix) { + indent(prefix.length); + + return EMPTY; + }); + + return add(this.renderBlockquote(value, now)); +} + +/** + * Tokenise a list. + * + * @example + * tokenizeList(eat, '- Foo', '', '-'); + * + * @param {function(string)} eat + * @param {string} $0 - Whole list. + * @param {string} $1 - Indent. + * @param {string} $2 - Bullet. + * @return {Node} - `list` node. + */ +function tokenizeList(eat, $0, $1, $2) { + var self = this; + var firstBullet = $2; + var value = trimTrailingLines($0); + var matches = value.match(self.rules.item); + var length = matches.length; + var index = 0; + var isLoose = false; + var now; + var bullet; + var item; + var enterTop; + var exitBlockquote; + var node; + var indent; + var size; + var position; + var end; + + /* + * Determine if all list-items belong to the + * same list. + */ + + if (!self.options.pedantic) { + while (++index < length) { + bullet = self.rules.bullet.exec(matches[index])[0]; + + if ( + firstBullet !== bullet && + ( + firstBullet.length === 1 && bullet.length === 1 || + bullet.charAt(bullet.length - 1) !== + firstBullet.charAt(firstBullet.length - 1) + ) + ) { matches = matches.slice(0, index); matches[index - 1] = trimTrailingLines(matches[index - 1]); - length = matches.length; + length = matches.length; + + break; + } + } + } + + if (self.options.commonmark) { + index = -1; + + while (++index < length) { + item = matches[index]; + indent = self.rules.indent.exec(item); + indent = indent[1] + repeat(C_SPACE, indent[2].length) + + indent[3]; + size = getIndent(indent).indent; + position = indent.length; + end = item.length; + + while (++position < end) { + if ( + item.charAt(position) === C_NEWLINE && + item.charAt(position - 1) === C_NEWLINE && + getIndent(item.slice(position + 1)).indent < size + ) { + matches[index] = item.slice(0, position - 1); + + matches = matches.slice(0, index + 1); + length = matches.length; + + break; + } + } + } + } + + self.previousBullet = self.currentBullet; + self.currentBullet = firstBullet; + + index = -1; + + node = eat(matches.join(C_NEWLINE)).reset( + self.renderList([], firstBullet) + ); + + enterTop = self.exitTop(); + exitBlockquote = self.enterBlockquote(); + + while (++index < length) { + item = matches[index]; + now = eat.now(); + + item = eat(item)(self.renderListItem(item, now), node); + + if (item.loose) { + isLoose = true; + } + + if (index !== length - 1) { + eat(C_NEWLINE); + } + } + + node.loose = isLoose; + + enterTop(); + exitBlockquote(); + + return node; +} + +/** + * Check whether `character` can be inside an unquoted + * attribute value. + * + * @param {string} character - Single character to check. + * @return {boolean} - Whether or not `character` can be + * inside an unquoted attribute value. + */ +function isUnquotedAttributeValue(character) { + return character !== C_DOUBLE_QUOTE && + character !== C_SINGLE_QUOTE && + character !== C_EQUALS && + character !== C_LT && + character !== C_GT && + character !== C_TICK; +} + +/** + * Check whether `character` can be inside a double-quoted + * attribute value. + * + * @param {string} character - Single character to check. + * @return {boolean} - Whether or not `character` can be + * inside a double-quoted attribute value. + */ +function isDoubleQuotedAttributeValue(character) { + return character !== C_DOUBLE_QUOTE; +} + +/** + * Check whether `character` can be inside a single-quoted + * attribute value. + * + * @param {string} character - Single character to check. + * @return {boolean} - Whether or not `character` can be + * inside a single-quoted attribute value. + */ +function isSingleQuotedAttributeValue(character) { + return character !== C_SINGLE_QUOTE; +} + +isSingleQuotedAttributeValue.delimiter = C_SINGLE_QUOTE; +isDoubleQuotedAttributeValue.delimiter = C_DOUBLE_QUOTE; + +/** + * Try to match comment. + * + * @param {string} value - Value to parse. + * @return {string?} - When applicable, the comment at the + * start of `value`. + */ +function eatHTMLComment(value, settings) { + var index = COMMENT_START_LENGTH; + var queue = COMMENT_START; + var length = value.length; + var commonmark = settings.commonmark; + var character; + var hasNonDash; + + // ''; + // '(?:)'; + + if (value.slice(0, index) === queue) { + while (index < length) { + character = value.charAt(index); + + if ( + character === COMMENT_END_CHAR && + value.slice(index, index + COMMENT_END_LENGTH) === COMMENT_END + ) { + return queue + COMMENT_END; + } + + if (commonmark) { + if (character === C_GT && !hasNonDash) { + return; + } + + if (character === C_DASH) { + if (value.charAt(index + 1) === C_DASH) { + return; + } + } else { + hasNonDash = true; + } + } + + queue += character; + index++; + } + } +} + +/** + * Try to match CDATA. + * + * @param {string} value - Value to parse. + * @return {string?} - When applicable, the CDATA at the + * start of `value`. + */ +function eatHTMLCDATA(value) { + var index = CDATA_START_LENGTH; + var queue = value.slice(0, index); + var length = value.length; + var character; + + if (queue.toUpperCase() === CDATA_START) { + while (index < length) { + character = value.charAt(index); + + if ( + character === CDATA_END_CHAR && + value.slice(index, index + CDATA_END_LENGTH) === CDATA_END + ) { + return queue + CDATA_END; + } + + queue += character; + index++; + } + } +} + +/** + * Try to match a processing instruction. + * + * @param {string} value - Value to parse. + * @return {string?} - When applicable, the processing + * instruction at the start of `value`. + */ +function eatHTMLProcessingInstruction(value) { + var index = 0; + var queue = EMPTY; + var length = value.length; + var character; + + if ( + value.charAt(index) === C_LT && + value.charAt(++index) === C_QUESTION_MARK + ) { + queue = C_LT + C_QUESTION_MARK; + index++; + + while (index < length) { + character = value.charAt(index); + + if ( + character === C_QUESTION_MARK && + value.charAt(index + 1) === C_GT + ) { + return queue + character + C_GT; + } + + queue += character; + index++; + } + } +} + +/** + * Try to match a declaration. + * + * @param {string} value - Value to parse. + * @return {string?} - When applicable, the declaration at + * the start of `value`. + */ +function eatHTMLDeclaration(value) { + var index = 0; + var length = value.length; + var queue = EMPTY; + var subqueue = EMPTY; + var character; + + if ( + value.charAt(index) === C_LT && + value.charAt(++index) === C_EXCLAMATION_MARK + ) { + queue = C_LT + C_EXCLAMATION_MARK; + index++; + + /* + * Eat as many alphabetic characters as + * possible. + */ + while (index < length) { + character = value.charAt(index); + + if (!isAlphabetic(character)) { break; } + + subqueue += character; + index++; + } + + character = value.charAt(index); + + if (!subqueue || !isWhiteSpace(character)) { + return; + } + + queue += subqueue + character; + index++; + + while (index < length) { + character = value.charAt(index); + + if (character === C_GT) { + return queue ? queue + character : null; + } + + queue += character; + index++; } } +} - if (self.options.commonmark) { - index = -1; +/** + * Try to match a closing tag. + * + * @param {string} value - Value to parse. + * @param {boolean?} [isBlock] - Whether the tag-name + * must be a known block-level node to match. + * @return {string?} - When applicable, the closing tag at + * the start of `value`. + */ +function eatHTMLClosingTag(value, isBlock) { + var index = 0; + var length = value.length; + var queue = EMPTY; + var subqueue = EMPTY; + var character; - while (++index < length) { - item = matches[index]; - indent = self.rules.indent.exec(item); - indent = indent[1] + repeat(SPACE, indent[2].length) + indent[3]; - size = getIndent(indent).indent; - position = indent.length; - end = item.length; + if ( + value.charAt(index) === C_LT && + value.charAt(++index) === C_BACKSLASH + ) { + queue = C_LT + C_BACKSLASH; + subqueue = character = value.charAt(++index); - while (++position < end) { - if ( - item.charAt(position) === NEW_LINE && - item.charAt(position - 1) === NEW_LINE && - getIndent(item.slice(position + 1)).indent < size - ) { - matches[index] = item.slice(0, position - 1); + if (!isAlphabetic(character)) { + return; + } + + index++; + + /* + * Eat as many alphabetic characters as + * possible. + */ + + while (index < length) { + character = value.charAt(index); + + if (!isAlphabetic(character) && !isNumeric(character)) { + break; + } + + subqueue += character; + index++; + } + + if (isBlock && blockElements.indexOf(subqueue.toLowerCase()) === -1) { + return; + } + + queue += subqueue; + + /* + * Eat white-space. + */ + + while (index < length) { + character = value.charAt(index); + + if (!isWhiteSpace(character)) { + break; + } + + queue += character; + index++ + } + + if (value.charAt(index) === C_GT) { + return queue + C_GT; + } + } +} + +/** + * Try to match an opening tag. + * + * @param {string} value - Value to parse. + * @param {boolean?} [isBlock] - Whether the tag-name + * must be a known block-level node to match. + * @return {string?} - When applicable, the opening tag at + * the start of `value`. + */ +function eatHTMLOpeningTag(value, isBlock) { + var index = 0; + var length = value.length; + var queue = EMPTY; + var subqueue = EMPTY; + var character = value.charAt(index); + var hasEquals; + var test; + + if (character === C_LT) { + queue = character; + subqueue = character = value.charAt(++index); + + if (!isAlphabetic(character)) { + return; + } + + index++; + + /* + * Eat as many alphabetic characters as + * possible. + */ + + while (index < length) { + character = value.charAt(index); + + if (!isAlphabetic(character) && !isNumeric(character)) { + break; + } + + subqueue += character; + index++; + } + + if (isBlock && blockElements.indexOf(subqueue.toLowerCase()) === -1) { + return; + } + + queue += subqueue; + subqueue = EMPTY; + + /* + * Find attributes. + */ + + while (index < length) { + /* + * Eat white-space. + */ + + while (index < length) { + character = value.charAt(index); + + if (!isWhiteSpace(character)) { + break; + } + + subqueue += character; + index++ + } + + if (!subqueue) { + break; + } + + /* + * Eat an attribute name. + */ + + queue += subqueue; + subqueue = EMPTY; + character = value.charAt(index); + + if ( + isAlphabetic(character) || + character === C_UNDERSCORE || + character === C_COLON + ) { + subqueue = character; + index++; + + while (index < length) { + character = value.charAt(index); + + if ( + !isAlphabetic(character) && + !isNumeric(character) && + character !== C_UNDERSCORE && + character !== C_COLON && + character !== C_DOT && + character !== C_DASH + ) { + break; + } + + subqueue += character; + index++ + } + } + + if (!subqueue) { + break; + } + + queue += subqueue; + subqueue = EMPTY; + hasEquals = false; + + /* + * Eat zero or more white-space and one + * equals sign. + */ + + while (index < length) { + character = value.charAt(index); + + if (!isWhiteSpace(character)) { + if (!hasEquals && character === C_EQUALS) { + hasEquals = true; + } else { + break; + } + } + + subqueue += character; + index++ + } + + queue += subqueue; + subqueue = EMPTY; + + if (!hasEquals) { + queue += subqueue; + } else { + character = value.charAt(index); + queue += subqueue; + + if (character === C_DOUBLE_QUOTE) { + test = isDoubleQuotedAttributeValue; + subqueue = character; + index++; + } else if (character === C_SINGLE_QUOTE) { + test = isSingleQuotedAttributeValue; + subqueue = character; + index++; + } else { + test = isUnquotedAttributeValue; + subqueue = EMPTY; + } + + while (index < length) { + character = value.charAt(index); + + if (!test(character)) { + break; + } + + subqueue += character; + index++; + } + + character = value.charAt(index); + index++; + + if (!test.delimiter) { + if (!subqueue.length) { + return; + } + + index--; + } else if (character === test.delimiter) { + subqueue += character; + } else { + return; + } + + queue += subqueue; + subqueue = EMPTY; + } + } + + /* + * Eat more white-space. + */ + + while (index < length) { + character = value.charAt(index); + + if (!isWhiteSpace(character)) { + break; + } + + queue += character; + index++ + } + + character = value.charAt(index); + + /* + * Eat an optional backslash (for self-closing + * tags). + */ + + if (character === C_BACKSLASH) { + queue += character; + character = value.charAt(++index); + } + + return character === C_GT ? queue + character : null; + } +} + +/** + * Tokenise HTML. + * + * @example + * tokenizeHTML(eat, 'foo'); + * + * @param {function(string)} eat + * @param {string} value - Rest of content. + * @return {Node?} - `html` node. + */ +function tokenizeHTML(eat, value) { + var self = this; + var index = 0; + var length = value.length; + var subvalue = EMPTY; + var lineCount; + var character; + var queue; + + /* + * Eat initial spacing. + */ - matches = matches.slice(0, index + 1); - length = matches.length; + while (index < length) { + character = value.charAt(index); - break; - } - } + if (character !== C_TAB && character !== C_SPACE) { + break; } + + subvalue += character; + index++; } - self.previousBullet = self.currentBullet; - self.currentBullet = firstBullet; + value = value.slice(index); - index = -1; + /* + * Try to eat an HTML thing. + */ - node = eat(matches.join(NEW_LINE)).reset( - self.renderList([], firstBullet) - ); + queue = eatHTMLComment(value, self.options) || + eatHTMLCDATA(value) || + eatHTMLProcessingInstruction(value) || + eatHTMLDeclaration(value) || + eatHTMLClosingTag(value, true) || + eatHTMLOpeningTag(value, true); - enterTop = self.exitTop(); - exitBlockquote = self.enterBlockquote(); + if (!queue) { + return; + } - while (++index < length) { - item = matches[index]; - now = eat.now(); + subvalue += queue; + index = subvalue.length; + queue = EMPTY; - item = eat(item)(self.renderListItem(item, now), node); + while (index < length) { + character = value.charAt(index); - if (item.loose) { - isLoose = true; + if (character === C_NEWLINE) { + queue += character + lineCount++; + } else if (queue.length < MIN_CLOSING_HTML_NEWLINE_COUNT) { + subvalue += queue + character; + queue = EMPTY; + } else { + break; } - if (index !== length - 1) { - eat(NEW_LINE); - } + index++; } - node.loose = isLoose; + return eat(subvalue)(self.renderRaw(T_HTML, subvalue)); +} - enterTop(); - exitBlockquote(); +tokenizeHTML.algorithmic = true; - return node; +/** + * Check whether `character` can be inside an enclosed + * URI. + * + * @param {string} character - Character to test. + * @return {boolean} - Whether or not `character` can be + * inside an enclosed URI. + */ +function isEnclosedURLCharacter(character) { + return character !== C_GT && + character !== C_BRACKET_OPEN && + character !== C_BRACKET_CLOSE; } +isEnclosedURLCharacter.delimiter = C_GT; + /** - * Tokenise HTML. - * - * @example - * tokenizeHtml(eat, 'foo'); + * Check whether `character` can be inside an unclosed + * URI. * - * @param {function(string)} eat - * @param {string} $0 - Whole HTML. - * @return {Node} - `html` node. + * @param {string} character - Character to test. + * @return {boolean} - Whether or not `character` can be + * inside an unclosed URI. */ -function tokenizeHtml(eat, $0) { - $0 = trimTrailingLines($0); - - return eat($0)(this.renderRaw(HTML, $0)); +function isUnclosedURLCharacter(character) { + return character !== C_BRACKET_OPEN && + character !== C_BRACKET_CLOSE && + !isWhiteSpace(character); } /** * Tokenise a definition. * * @example - * var $0 = '[foo]: http://example.com "Example Domain"'; - * var $1 = 'foo'; - * var $2 = 'http://example.com'; - * var $3 = 'Example Domain'; - * tokenizeDefinition(eat, $0, $1, $2, $3); + * var value = '[foo]: http://example.com "Example Domain"'; + * tokenizeDefinition(eat, value); * * @property {boolean} onlyAtTop * @property {boolean} notInBlockquote * @param {function(string)} eat - * @param {string} $0 - Whole definition. - * @param {string} $1 - Key. - * @param {string} $2 - URL. - * @param {string} $3 - Title. - * @return {Node} - `definition` node. + * @param {string} value - Rest of content. + * @return {Node?} - `definition` node. */ -function tokenizeDefinition(eat, $0, $1, $2, $3) { - var link = $2; +function tokenizeDefinition(eat, value) { + var index = 0; + var length = value.length; + var subvalue = EMPTY; + var queue; + var character; + var test; + var identifier; + var url; + var title; + + while (index < length) { + character = value.charAt(index); + + if (!isWhiteSpace(character)) { + break; + } - /* - * Remove angle-brackets from `link`. - */ + subvalue += character; + index++; + } + + character = value.charAt(index); - if (link.charAt(0) === LT && link.charAt(link.length - 1) === GT) { - link = link.slice(1, -1); + if (character !== C_BRACKET_OPEN) { + return; } - return eat($0)({ - 'type': 'definition', - 'identifier': normalize($1), - 'title': $3 ? decode(this.descape($3), eat) : null, - 'link': decode(this.descape(link), eat) - }); + index++; + subvalue += character; + queue = EMPTY; + + while (index < length) { + character = value.charAt(index); + + if (character === C_BRACKET_CLOSE) { + break; + } else if (character === C_SLASH) { + queue += character; + index++; + character = value.charAt(index); + } + + queue += character; + index++; + } + + if ( + !queue || + value.charAt(index) !== C_BRACKET_CLOSE || + value.charAt(index + 1) !== C_COLON + ) { + return; + } + + identifier = queue; + subvalue += queue + C_BRACKET_CLOSE + C_COLON; + index = subvalue.length; + queue = EMPTY; + + while (index < length) { + character = value.charAt(index); + + if ( + character !== C_TAB && + character !== C_SPACE && + character !== C_NEWLINE + ) { + break; + } + + subvalue += character; + index++; + } + + character = value.charAt(index); + index++; + + if (character === C_LT) { + test = isEnclosedURLCharacter; + subvalue += character; + queue = EMPTY; + } else { + test = isUnclosedURLCharacter; + queue = character; + } + + while (index < length) { + character = value.charAt(index); + + if (!test(character)) { + break; + } + + queue += character; + index++; + } + + if (test.delimiter) { + character = value.charAt(index); + + if (character !== test.delimiter) { + return; + } + + subvalue += queue + character; + } else if (queue) { + subvalue += queue; + } else { + return; + } + + url = queue; + queue = EMPTY; + + while (index < length) { + character = value.charAt(index); + + if ( + character !== C_TAB && + character !== C_SPACE && + character !== C_NEWLINE + ) { + break; + } + + queue += character; + index++; + } + + character = value.charAt(index); + test = null; + + if (character === C_DOUBLE_QUOTE) { + test = C_DOUBLE_QUOTE; + } else if (character === C_SINGLE_QUOTE) { + test = C_SINGLE_QUOTE; + } if (character === C_PAREN_OPEN) { + test = C_PAREN_CLOSE; + } + + if (!test) { + queue = EMPTY; + index = subvalue.length; + } else if (!queue) { + return; + } else { + subvalue += queue + character; + index = subvalue.length; + queue = EMPTY; + + while (index < length) { + character = value.charAt(index); + + if (character === test) { + break; + } + + if (character === C_NEWLINE) { + index++; + character = value.charAt(index); + + if (character === C_NEWLINE || character === test) { + return; + } + + queue += C_NEWLINE; + } + + queue += character; + index++; + } + + character = value.charAt(index); + + if (character !== test) { + return; + } + + subvalue += queue + character; + index++; + title = queue; + queue = EMPTY; + } + + while (index < length) { + character = value.charAt(index); + + if (character !== C_TAB && character !== C_SPACE) { + break; + } + + subvalue += character; + index++; + } + + character = value.charAt(index); + + if (!character || character === C_NEWLINE) { + return eat(subvalue)({ + 'type': T_DEFINITION, + 'identifier': normalize(identifier), + 'title': title ? decode(this.descape(title), eat) : null, + 'link': decode(this.descape(url), eat) + }); + } } tokenizeDefinition.onlyAtTop = true; tokenizeDefinition.notInBlockquote = true; +tokenizeDefinition.algorithmic = true; /** * Tokenise YAML front matter. @@ -882,53 +2373,224 @@ tokenizeDefinition.notInBlockquote = true; * * @property {boolean} onlyAtStart * @param {function(string)} eat - * @param {string} $0 - Whole front matter. - * @param {string} $1 - Content. - * @return {Node} - `yaml` node. + * @param {string} value - Rest of content. + * @return {Node?} - `yaml` node. */ -function tokenizeYAMLFrontMatter(eat, $0, $1) { - return eat($0)(this.renderRaw(YAML, $1 ? trimTrailingLines($1) : EMPTY)); +function tokenizeYAMLFrontMatter(eat, value) { + var subvalue; + var content; + var index; + var length; + var character; + var queue; + + if ( + !this.options.yaml || + value.charAt(0) !== C_DASH || + value.charAt(1) !== C_DASH || + value.charAt(2) !== C_DASH || + value.charAt(3) !== C_NEWLINE + ) { + return; + } + + subvalue = YAML_FENCE + C_NEWLINE; + content = queue = EMPTY; + index = 3; + length = value.length; + + while (++index < length) { + character = value.charAt(index); + + if ( + character === C_DASH && + (queue || !content) && + value.charAt(index + 1) === C_DASH && + value.charAt(index + 2) === C_DASH + ) { + subvalue += queue + YAML_FENCE; + + return eat(subvalue)(this.renderRaw(T_YAML, content)); + } + + if (character === C_NEWLINE) { + queue += character; + } else { + subvalue += queue + character; + content += queue + character; + queue = EMPTY; + } + } } tokenizeYAMLFrontMatter.onlyAtStart = true; +tokenizeYAMLFrontMatter.algorithmic = true; /** * Tokenise a footnote definition. * * @example - * var $0 = '[foo]: Bar.'; - * var $1 = '[foo]'; - * var $2 = 'foo'; - * var $3 = 'Bar.'; - * tokenizeFootnoteDefinition(eat, $0, $1, $2, $3); + * tokenizeFootnoteDefinition(eat, '[^foo]: Bar.'); * * @property {boolean} onlyAtTop * @property {boolean} notInBlockquote * @param {function(string)} eat - * @param {string} $0 - Whole definition. - * @param {string} $1 - Whole key. - * @param {string} $2 - Key. - * @param {string} $3 - Whole value. - * @return {Node} - `footnoteDefinition` node. + * @param {string} value - Rest of content. + * @return {Node?} - `footnoteDefinition` node. */ -function tokenizeFootnoteDefinition(eat, $0, $1, $2, $3) { +function tokenizeFootnoteDefinition(eat, value) { var self = this; - var now = eat.now(); - var indent = self.indent(now.line); + var index; + var length; + var subvalue; + var now; + var indent; + var content; + var queue; + var subqueue; + var character; + var identifier; + + if (!self.options.footnotes) { + return; + } + + index = 0; + length = value.length; + subvalue = EMPTY; + now = eat.now(); + indent = self.indent(now.line); + + while (index < length) { + character = value.charAt(index); + + if (!isWhiteSpace(character)) { + break; + } + + subvalue += character; + index++; + } + + if ( + value.charAt(index) !== C_BRACKET_OPEN || + value.charAt(index + 1) !== C_CARET + ) { + return; + } + + subvalue += C_BRACKET_OPEN + C_CARET; + index = subvalue.length; + queue = EMPTY; + + while (index < length) { + character = value.charAt(index); + + if (character === C_BRACKET_CLOSE) { + break; + } else if (character === C_SLASH) { + queue += character; + index++; + character = value.charAt(index); + } + + queue += character; + index++; + } + + if ( + !queue || + value.charAt(index) !== C_BRACKET_CLOSE || + value.charAt(index + 1) !== C_COLON + ) { + return; + } + + identifier = normalize(queue); + subvalue += queue + C_BRACKET_CLOSE + C_COLON; + index = subvalue.length; + + while (index < length) { + character = value.charAt(index); + + if ( + character !== C_TAB && + character !== C_SPACE + ) { + break; + } + + subvalue += character; + index++; + } + + now.column += subvalue.length; + queue = content = subqueue = EMPTY; + + while (index < length) { + character = value.charAt(index); + + if (character === C_NEWLINE) { + subqueue = character; + index++; + + while (index < length) { + character = value.charAt(index); + + if (character !== C_NEWLINE) { + break; + } + + subqueue += character; + index++; + } + + queue += subqueue; + subqueue = EMPTY; + + while (index < length) { + character = value.charAt(index); + + if (character !== C_SPACE) { + break; + } + + subqueue += character; + index++; + } + + if (!subqueue.length) { + break; + } + + queue += subqueue; + } + + if (queue) { + content += queue; + queue = EMPTY; + } + + content += character; + index++; + } + + subvalue += content; - $3 = $3.replace(EXPRESSION_INITIAL_TAB, function (value) { - indent(value.length); + content = content.replace(EXPRESSION_INITIAL_TAB, function (line) { + indent(line.length); return EMPTY; }); - now.column += $1.length; - - return eat($0)(self.renderFootnoteDefinition(normalize($2), $3, now)); + return eat(subvalue)( + self.renderFootnoteDefinition(identifier, content, now) + ); } tokenizeFootnoteDefinition.onlyAtTop = true; tokenizeFootnoteDefinition.notInBlockquote = true; +tokenizeFootnoteDefinition.algorithmic = true; /** * Tokenise a table. @@ -961,7 +2623,7 @@ function tokenizeTable(eat, $0, $1, $2, $3, $4, $5) { $0 = trimTrailingLines($0); node = eat($0).reset({ - 'type': TABLE, + 'type': T_TABLE, 'align': [], 'children': [] }); @@ -978,8 +2640,8 @@ function tokenizeTable(eat, $0, $1, $2, $3, $4, $5) { var row = eat(value).reset(self.renderParent(type, []), node); var length = value.length + 1; var index = -1; - var queue = ''; - var cell = ''; + var queue = EMPTY; + var cell = EMPTY; var preamble = true; var count; var opening; @@ -990,7 +2652,7 @@ function tokenizeTable(eat, $0, $1, $2, $3, $4, $5) { while (++index < length) { character = value.charAt(index); - if (character === '\t' || character === ' ') { + if (character === C_TAB || character === C_SPACE) { if (cell) { queue += character; } else { @@ -1000,12 +2662,11 @@ function tokenizeTable(eat, $0, $1, $2, $3, $4, $5) { continue; } - if (character === '|' || character === '') { + if (character === EMPTY || character === C_PIPE) { if (preamble) { eat(character); } else { if (character && opening) { - // cell += queue + character; queue += character; continue; } @@ -1019,36 +2680,36 @@ function tokenizeTable(eat, $0, $1, $2, $3, $4, $5) { queue = queue.charAt(queue.length - 1); } else { subvalue += queue; - queue = ''; + queue = EMPTY; } } now = eat.now(); eat(subvalue)( - self.renderInline(TABLE_CELL, cell, now), row + self.renderInline(T_TABLE_CELL, cell, now), row ); } eat(queue + character); - queue = ''; - cell = ''; + queue = EMPTY; + cell = EMPTY; } } else { if (queue) { cell += queue; - queue = ''; + queue = EMPTY; } cell += character; - if (character === '\\' && index !== length - 2) { + if (character === C_SLASH && index !== length - 2) { cell += value.charAt(index + 1); index++; } - if (character === '`') { + if (character === C_TICK) { count = 1; while (value.charAt(index + 1) === character) { @@ -1070,15 +2731,15 @@ function tokenizeTable(eat, $0, $1, $2, $3, $4, $5) { } /* - * Add the table's header. + * Add the table’s header. */ - renderRow(TABLE_HEADER, $1); + renderRow(T_TABLE_HEADER, $1); - eat(NEW_LINE); + eat(C_NEWLINE); /* - * Add the table's alignment. + * Add the table’s alignment. */ eat($3); @@ -1087,22 +2748,22 @@ function tokenizeTable(eat, $0, $1, $2, $3, $4, $5) { .replace(EXPRESSION_TABLE_FENCE, EMPTY) .split(EXPRESSION_TABLE_BORDER); - node.align = getAlignment($4); + node.align = getAlignments($4); /* - * Add the table rows to table's children. + * Add the table rows to table’s children. */ - $5 = trimTrailingLines($5).split(NEW_LINE); + $5 = trimTrailingLines($5).split(C_NEWLINE); index = -1; length = $5.length; while (++index < length) { - renderRow(TABLE_ROW, $5[index]); + renderRow(T_TABLE_ROW, $5[index]); if (index !== length - 1) { - eat(NEW_LINE); + eat(C_NEWLINE); } } @@ -1125,30 +2786,106 @@ tokenizeTable.onlyAtTop = true; function tokenizeParagraph(eat, $0) { var now = eat.now(); - if (trim($0) === EMPTY) { - eat($0); + if (trim($0) === EMPTY) { + eat($0); + + return null; + } + + $0 = trimTrailingLines($0); + + return eat($0)(this.renderInline(T_PARAGRAPH, $0, now)); +} + +/** + * Tokenise a text node. + * + * @example + * tokenizeText(eat, 'foo'); + * + * @param {function(string)} eat + * @param {string} value - Rest of content. + * @return {Node} - `text` node. + */ +function tokenizeText(eat, value) { + var index = 0; + var length = value.length; + var subvalue = value.charAt(0); + var options = this.options; + var gfm = options.gfm; + var breaks = options.breaks; + var offset; + var queue; + var character; + var position; + var protocol; + + while (++index < length) { + character = value.charAt(index); + + if ( + !character || + character === C_SLASH || + character === C_LT || + character === C_EXCLAMATION_MARK || + character === C_BRACKET_OPEN || + character === C_UNDERSCORE || + character === C_ASTERISK || + character === C_TICK || + (gfm && character === C_TILDE) || + (breaks && character === C_NEWLINE) + ) { + break; + } + + if (character === C_SPACE) { + queue = C_SPACE; + offset = index; + + while (++offset < length) { + character = value.charAt(offset); + + if (character !== C_SPACE) { + break; + } + + queue += C_SPACE; + } + + if ( + character === C_NEWLINE && + (breaks || offset - index >= 2) + ) { + break; + } + + subvalue += queue; + index = offset - 1; + character = EMPTY; + } else if (gfm) { + position = -1; + + while (++position < PROTOCOLS_LENGTH) { + protocol = PROTOCOLS[position]; + + if (protocol === value.slice(index, index + protocol.length)) { + character = EMPTY; + break; + } + } + + if (!character) { + break; + } + } - return null; + subvalue += character; } - $0 = trimTrailingLines($0); - - return eat($0)(this.renderInline(PARAGRAPH, $0, now)); + return eat(subvalue)(this.renderRaw(T_TEXT, subvalue)); } -/** - * Tokenise a text node. - * - * @example - * tokenizeText(eat, 'foo'); - * - * @param {function(string)} eat - * @param {string} $0 - Whole text. - * @return {Node} - `text` node. - */ -function tokenizeText(eat, $0) { - return eat($0)(this.renderRaw(TEXT, $0)); -} +tokenizeText.algorithmic = true; /** * Create a code-block node. @@ -1163,7 +2900,7 @@ function tokenizeText(eat, $0) { */ function renderCodeBlock(value, language, eat) { return { - 'type': CODE, + 'type': T_CODE, 'lang': language ? decode(this.descape(language), eat) : null, 'value': trimTrailingLines(value || EMPTY) }; @@ -1192,7 +2929,7 @@ function renderList(children, bullet) { */ return { - 'type': LIST, + 'type': T_LIST, 'ordered': bullet.length > 1, 'start': start, 'loose': null, @@ -1228,7 +2965,7 @@ function renderPedanticListItem(value, position) { } /* - * Remove the list-item's bullet. + * Remove the list-item’s bullet. */ value = value.replace(EXPRESSION_PEDANTIC_BULLET, replacer); @@ -1265,7 +3002,7 @@ function renderNormalListItem(value, position) { var max; /* - * Remove the list-item's bullet. + * Remove the list-item’s bullet. */ value = value.replace(EXPRESSION_BULLET, function ($0, $1, $2, $3, $4) { @@ -1279,19 +3016,19 @@ function renderNormalListItem(value, position) { */ if (Number($2) < 10 && bullet.length % 2 === 1) { - $2 = SPACE + $2; + $2 = C_SPACE + $2; } - max = $1 + repeat(SPACE, $2.length) + $3; + max = $1 + repeat(C_SPACE, $2.length) + $3; return max + rest; }); - lines = value.split(NEW_LINE); + lines = value.split(C_NEWLINE); trimmedLines = removeIndentation( value, getIndent(max).indent - ).split(NEW_LINE); + ).split(C_NEWLINE); /* * We replaced the initial bullet with something @@ -1313,18 +3050,9 @@ function renderNormalListItem(value, position) { indent(lines[index].length - trimmedLines[index].length); } - return trimmedLines.join(NEW_LINE); + return trimmedLines.join(C_NEWLINE); } -/* - * A map of two functions which can create list items. - */ - -var LIST_ITEM_MAP = {}; - -LIST_ITEM_MAP.true = renderPedanticListItem; -LIST_ITEM_MAP.false = renderNormalListItem; - /** * Create a list-item node. * @@ -1349,7 +3077,7 @@ function renderListItem(value, position) { if (task) { indent = task[0].length; - checked = task[1].toLowerCase() === 'x'; + checked = task[1].toLowerCase() === C_X_LOWER; self.indent(position.line)(indent); value = value.slice(indent); @@ -1357,9 +3085,9 @@ function renderListItem(value, position) { } node = { - 'type': LIST_ITEM, + 'type': T_LIST_ITEM, 'loose': EXPRESSION_LOOSE_LIST_ITEM.test(value) || - value.charAt(value.length - 1) === NEW_LINE + value.charAt(value.length - 1) === C_NEWLINE }; if (self.options.gfm) { @@ -1388,7 +3116,7 @@ function renderFootnoteDefinition(identifier, value, position) { var node; node = { - 'type': FOOTNOTE_DEFINITION, + 'type': T_FOOTNOTE_DEFINITION, 'identifier': identifier, 'children': self.tokenizeBlock(value, position) }; @@ -1411,7 +3139,7 @@ function renderFootnoteDefinition(identifier, value, position) { */ function renderHeading(value, depth, position) { return { - 'type': HEADING, + 'type': T_HEADING, 'depth': depth, 'children': this.tokenizeInline(value, position) }; @@ -1431,7 +3159,7 @@ function renderBlockquote(value, now) { var self = this; var exitBlockquote = self.enterBlockquote(); var node = { - 'type': BLOCKQUOTE, + 'type': T_BLOCKQUOTE, 'children': this.tokenizeBlock(value, now) }; @@ -1511,7 +3239,7 @@ function renderLink(isLink, href, text, title, position, eat) { var node; node = { - 'type': isLink ? LINK : IMAGE, + 'type': isLink ? T_LINK : T_IMAGE, 'title': title ? decode(self.descape(title), eat) : null }; @@ -1541,7 +3269,7 @@ function renderLink(isLink, href, text, title, position, eat) { * @return {Object} - `footnote` node. */ function renderFootnote(value, position) { - return this.renderInline(FOOTNOTE, value, position); + return this.renderInline(T_FOOTNOTE, value, position); } /** @@ -1578,17 +3306,79 @@ function renderBlock(type, value, position) { * Tokenise an escape sequence. * * @example - * tokenizeEscape(eat, '\\a', 'a'); + * tokenizeEscape(eat, '\\a'); * * @param {function(string)} eat - * @param {string} $0 - Whole escape. - * @param {string} $1 - Escaped character. + * @param {string} value - Rest of content. * @return {Node} - `escape` node. */ -function tokenizeEscape(eat, $0, $1) { - return eat($0)(this.renderRaw(ESCAPE, $1)); +function tokenizeEscape(eat, value) { + var character; + var options; + + if (value.charAt(0) !== C_SLASH) { + return; + } + + character = value.charAt(1); + options = this.options; + + if ( + ( + character === C_SLASH || + character === C_TICK || + character === C_ASTERISK || + character === C_BRACE_OPEN || + character === C_BRACE_CLOSE || + character === C_BRACKET_OPEN || + character === C_BRACKET_CLOSE || + character === C_PAREN_OPEN || + character === C_PAREN_CLOSE || + character === C_HASH || + character === C_PLUS || + character === C_DASH || + character === C_DOT || + character === C_EXCLAMATION_MARK || + character === C_UNDERSCORE || + character === C_GT + ) || + ( + ( + options.commonmark || + options.gfm + ) && + ( + character === C_TILDE || + character === C_PIPE + ) + ) || + ( + options.commonmark && + ( + character === C_NEWLINE || + character === C_PERCENTAGE || + character === C_DOUBLE_QUOTE || + character === C_DOLLAR || + character === C_AMPERSAND || + character === C_SINGLE_QUOTE || + character === C_COMMA || + character === C_BACKSLASH || + character === C_COLON || + character === C_SEMI_COLON || + character === C_LT || + character === C_EQUALS || + character === C_QUESTION_MARK || + character === C_AT_SIGN || + character === C_CARET + ) + ) + ) { + return eat(C_SLASH + character)(this.renderRaw(T_ESCAPE, character)); + } } +tokenizeEscape.algorithmic = true; + /** * Tokenise a URL in carets. * @@ -1597,33 +3387,106 @@ function tokenizeEscape(eat, $0, $1) { * * @property {boolean} notInLink * @param {function(string)} eat - * @param {string} $0 - Whole link. - * @param {string} $1 - URL. - * @param {string?} [$2] - Protocol or at. - * @return {Node} - `link` node. + * @param {string} value - Rest of content. + * @return {Node?} - `link` node. */ -function tokenizeAutoLink(eat, $0, $1, $2) { +function tokenizeAutoLink(eat, value) { var self = this; - var href = $1; - var text = $1; + var subvalue = EMPTY; + var length = value.length; + var index = 0; + var queue = EMPTY; + var character = value.charAt(0); + var hasAtCharacter = false; + var link = EMPTY; var now = eat.now(); - var offset = 1; + var content; var tokenize; var node; - if ($2 === AT_SIGN) { + if (character !== C_LT) { + return; + } + + index++; + subvalue = character; + + while (index < length) { + character = value.charAt(index); + + if ( + character === C_SPACE || + character === C_GT || + character === C_AT_SIGN || + (character === C_COLON && value.charAt(index + 1) === C_BACKSLASH) + ) { + break; + } + + queue += character; + index++; + } + + if (!queue) { + return; + } + + link += queue; + queue = EMPTY; + + character = value.charAt(index); + link += character; + index++; + + if (character === C_AT_SIGN) { + hasAtCharacter = true; + } else { + if (character !== C_COLON) { + return; + } + + character = value.charAt(++index); + link += character; + + if (character !== C_BACKSLASH) { + return; + } + } + + while (index < length) { + character = value.charAt(index); + + if (character === C_SPACE || character === C_GT) { + break; + } + + queue += character; + index++; + } + + character = value.charAt(index); + + if (!queue || character !== C_GT) { + return; + } + + link += queue; + content = link; + subvalue += link + character; + + if (hasAtCharacter) { if ( - text.substr(0, MAILTO_PROTOCOL.length).toLowerCase() !== + link.substr(0, MAILTO_PROTOCOL.length).toLowerCase() !== MAILTO_PROTOCOL ) { - href = MAILTO_PROTOCOL + text; + link = MAILTO_PROTOCOL + link; } else { - text = text.substr(MAILTO_PROTOCOL.length); - offset += MAILTO_PROTOCOL.length; + content = content.substr(MAILTO_PROTOCOL.length); + now.column += MAILTO_PROTOCOL.length; } } - now.column += offset; + now.column++; /* * Temporarily remove support for escapes in autolinks. @@ -1632,7 +3495,7 @@ function tokenizeAutoLink(eat, $0, $1, $2) { tokenize = self.inlineTokenizers.escape; self.inlineTokenizers.escape = null; - node = eat($0)(self.renderLink(true, href, text, null, now, eat)); + node = eat(subvalue)(self.renderLink(true, link, content, null, now, eat)); self.inlineTokenizers.escape = tokenize; @@ -1640,6 +3503,7 @@ function tokenizeAutoLink(eat, $0, $1, $2) { } tokenizeAutoLink.notInLink = true; +tokenizeAutoLink.algorithmic = true; /** * Tokenise a URL in text. @@ -1649,16 +3513,89 @@ tokenizeAutoLink.notInLink = true; * * @property {boolean} notInLink * @param {function(string)} eat - * @param {string} $0 - Whole link. - * @return {Node} - `link` node. + * @param {string} value - Rest of content. + * @return {Node?} - `link` node. */ -function tokenizeURL(eat, $0) { - var now = eat.now(); +function tokenizeURL(eat, value) { + var self = this; + var subvalue; + var character; + var index; + var protocol; + var match; + var length; + var queue; + var once; + var now; + + if (!self.options.gfm) { + return; + } + + subvalue = EMPTY; + index = -1; + length = PROTOCOLS_LENGTH; + + while (++index < length) { + protocol = PROTOCOLS[index]; + match = value.slice(0, protocol.length); + + if (match.toLowerCase() === protocol) { + subvalue = match; + break; + } + } + + if (!subvalue) { + return; + } + + index = subvalue.length; + length = value.length; + queue = EMPTY; + + while (index < length) { + character = value.charAt(index); + + if (isWhiteSpace(character) || character === C_LT) { + break; + } - return eat($0)(this.renderLink(true, $0, $0, null, now, eat)); + if ( + character === C_DOT || + character === C_COMMA || + character === C_COLON || + character === C_SEMI_COLON || + character === C_DOUBLE_QUOTE || + character === C_SINGLE_QUOTE || + character === C_PAREN_CLOSE || + character === C_BRACKET_CLOSE + ) { + if (once) { + break; + } + + once = true; + } + + queue += character; + index++; + } + + if (!queue) { + return; + } + + subvalue += queue; + now = eat.now(); + + return eat(subvalue)( + self.renderLink(true, subvalue, subvalue, null, now, eat) + ); } tokenizeURL.notInLink = true; +tokenizeURL.algorithmic = true; /** * Tokenise an HTML tag. @@ -1667,21 +3604,33 @@ tokenizeURL.notInLink = true; * tokenizeTag(eat, ''); * * @param {function(string)} eat - * @param {string} $0 - Content. - * @return {Node} - `html` node. + * @param {string} value - Rest of content. + * @return {Node?} - `html` node. */ -function tokenizeTag(eat, $0) { +function tokenizeTag(eat, value) { var self = this; + var subvalue = eatHTMLComment(value, self.options) || + eatHTMLCDATA(value) || + eatHTMLProcessingInstruction(value) || + eatHTMLDeclaration(value) || + eatHTMLClosingTag(value) || + eatHTMLOpeningTag(value); + + if (!subvalue) { + return; + } - if (!self.inLink && EXPRESSION_HTML_LINK_OPEN.test($0)) { + if (!self.inLink && EXPRESSION_HTML_LINK_OPEN.test(subvalue)) { self.inLink = true; - } else if (self.inLink && EXPRESSION_HTML_LINK_CLOSE.test($0)) { + } else if (self.inLink && EXPRESSION_HTML_LINK_CLOSE.test(subvalue)) { self.inLink = false; } - return eat($0)(self.renderRaw(HTML, $0)); + return eat(subvalue)(self.renderRaw(T_HTML, subvalue)); } +tokenizeTag.algorithmic = true; + /** * Tokenise a link. * @@ -1704,8 +3653,8 @@ function tokenizeTag(eat, $0) { * @return {Node?} - `link` node, `image` node, or `null`. */ function tokenizeLink(eat, $0, $1, $2, $3, $4, $5, $6, $7) { - var isLink = $1 === BRACKET_OPEN; - var href = $4 || $3 || ''; + var isLink = $1 === C_BRACKET_OPEN; + var href = $4 || $3 || EMPTY; var title = $7 || $6 || $5; var now; @@ -1727,80 +3676,220 @@ function tokenizeLink(eat, $0, $1, $2, $3, $4, $5, $6, $7) { * shortcut reference link, or footnote. * * @example - * tokenizeReference(eat, '[foo]', '[', 'foo'); - * tokenizeReference(eat, '[foo][]', '[', 'foo', ''); - * tokenizeReference(eat, '[foo][bar]', '[', 'foo', 'bar'); + * tokenizeReference(eat, '[foo]'); + * tokenizeReference(eat, '[foo][]'); + * tokenizeReference(eat, '[foo][bar]'); * * @param {function(string)} eat - * @param {string} $0 - Whole link. - * @param {string} $1 - Prefix. - * @param {string} $2 - identifier. - * @param {string} $3 - Content. + * @param {string} value - Rest of content. * @return {Node?} - `linkReference`, `imageReference`, or - * `footnoteReference`. Returns null when this is a link - * reference, but we're already in a link. + * `footnoteReference`. Null when not found or already + * in a link. */ -function tokenizeReference(eat, $0, $1, $2, $3) { +function tokenizeReference(eat, value) { var self = this; - var text = $2; - var identifier = $3 || $2; - var type = $1 === BRACKET_OPEN ? 'link' : 'image'; - var isFootnote = self.options.footnotes && identifier.charAt(0) === CARET; - var now = eat.now(); - var referenceType; + var character = value.charAt(0); + var index = 0; + var length = value.length; + var subvalue = EMPTY; + var intro = EMPTY; + var type = T_LINK; + var referenceType = REFERENCE_TYPE_SHORTCUT; + var text; + var identifier; + var now; var node; var exitLink; + var queue; + var bracketed; + var depth; - if ($3 === undefined) { - referenceType = 'shortcut'; - } else if ($3 === '') { - referenceType = 'collapsed'; - } else { - referenceType = 'full'; + /* + * Check whether we’re eating an image. + */ + + if (character === C_EXCLAMATION_MARK) { + type = T_IMAGE; + intro = character; + character = value.charAt(++index); + } + + if (character !== C_BRACKET_OPEN) { + return; + } + + index++; + intro += character; + queue = EMPTY; + + /* + * Check whether we’re eating a footnote. + */ + + if ( + self.options.footnotes && + type === T_LINK && + value.charAt(index) === C_CARET + ) { + intro += C_CARET; + index++; + type = T_FOOTNOTE; + } + + /* + * Eat the text. + */ + + depth = 0; + + while (index < length) { + character = value.charAt(index); + + if (character === C_BRACKET_OPEN) { + bracketed = true; + depth++; + } else if (character === C_BRACKET_CLOSE) { + if (!depth) { + break; + } + + depth--; + } + + if (character === C_SLASH) { + queue += C_SLASH; + character = value.charAt(++index); + } + + queue += character; + index++; + } + + if (!queue) { + return; } - if (referenceType !== 'shortcut') { - isFootnote = false; + subvalue = text = queue; + character = value.charAt(index); + + if (character !== C_BRACKET_CLOSE) { + return; } - if (isFootnote) { - identifier = identifier.substr(1); + index++; + subvalue += character; + queue = EMPTY; + + while (index < length) { + character = value.charAt(index); + + if (!isWhiteSpace(character)) { + break; + } + + queue += character; + index++; } - if (isFootnote) { - if (identifier.indexOf(SPACE) !== -1) { - return eat($0)(self.renderFootnote(identifier, eat.now())); + character = value.charAt(index); + + if (character !== C_BRACKET_OPEN) { + identifier = text; + } else { + identifier = EMPTY; + queue += character; + index++; + + while (index < length) { + character = value.charAt(index); + + if ( + character === C_BRACKET_OPEN || + character === C_BRACKET_CLOSE + ) { + break; + } + + if (character === C_SLASH) { + identifier += C_SLASH; + character = value.charAt(++index); + } + + identifier += character; + index++; + } + + character = value.charAt(index); + + if (character === C_BRACKET_CLOSE) { + queue += identifier + character; + index++; + + referenceType = identifier ? + REFERENCE_TYPE_FULL : + REFERENCE_TYPE_COLLAPSED; } else { - type = 'footnote'; + identifier = EMPTY; } + + subvalue += queue; + queue = EMPTY; + } + + /* + * Brackets cannot be inside the identifier. + */ + + if (referenceType !== REFERENCE_TYPE_FULL && bracketed) { + return; + } + + /* + * Inline footnotes cannot have an identifier. + */ + + if (type === T_FOOTNOTE && referenceType !== REFERENCE_TYPE_SHORTCUT) { + type = T_LINK; + intro = C_BRACKET_OPEN; + text = C_CARET + text; } - if (self.inLink && type === 'link') { + subvalue = intro + subvalue; + + if (type === T_FOOTNOTE && text.indexOf(C_SPACE) !== -1) { + return eat(subvalue)(self.renderFootnote(text, eat.now())); + } + + if (type === T_LINK && self.inLink) { return null; } - now.column += $1.length; + now = eat.now(); + now.column += intro.length; + identifier = referenceType === REFERENCE_TYPE_FULL ? identifier : text; node = { 'type': type + 'Reference', 'identifier': normalize(identifier) }; - if (type === 'link' || type === 'image') { + if (type === T_LINK || type === T_IMAGE) { node.referenceType = referenceType; } - if (type === 'link') { + if (type === T_LINK) { exitLink = self.enterLink(); node.children = self.tokenizeInline(text, now); exitLink(); - } else if (type === 'image') { + } else if (type === T_IMAGE) { node.alt = decode(self.descape(text), eat); } - return eat($0)(node); + return eat(subvalue)(node); } +tokenizeReference.algorithmic = true; + /** * Tokenise strong emphasis. * @@ -1826,7 +3915,7 @@ function tokenizeStrong(eat, $0, $1, $2, $3, $4) { now.column += 2; - return eat($0)(this.renderInline(STRONG, value, now)); + return eat($0)(this.renderInline(T_STRONG, value, now)); } /** @@ -1859,7 +3948,7 @@ function tokenizeEmphasis(eat, $0, $1, $2, $3, $4) { now.column += 1; - return eat($0)(this.renderInline(EMPHASIS, value, now)); + return eat($0)(this.renderInline(T_EMPHASIS, value, now)); } /** @@ -1869,34 +3958,158 @@ function tokenizeEmphasis(eat, $0, $1, $2, $3, $4) { * tokenizeDeletion(eat, '~~foo~~', '~~', 'foo'); * * @param {function(string)} eat - * @param {string} $0 - Whole deletion. - * @param {string} $1 - Content. + * @param {string} value - Rest of content. * @return {Node} - `delete` node. */ -function tokenizeDeletion(eat, $0, $1) { - var now = eat.now(); +function tokenizeDeletion(eat, value) { + var character = EMPTY; + var previous = EMPTY; + var preceding = EMPTY; + var subvalue = EMPTY; + var index; + var length; + var now; + + if ( + !this.options.gfm || + value.charAt(0) !== C_TILDE || + value.charAt(1) !== C_TILDE || + isWhiteSpace(value.charAt(2)) + ) { + return; + } + index = 1; + length = value.length; + now = eat.now(); now.column += 2; - return eat($0)(this.renderInline(DELETE, $1, now)); + while (++index < length) { + character = value.charAt(index); + + if ( + character === C_TILDE && + previous === C_TILDE && + (!preceding || !isWhiteSpace(preceding)) + ) { + return eat(C_TILDE + C_TILDE + subvalue + C_TILDE + C_TILDE)( + this.renderInline(T_DELETE, subvalue, now) + ); + } + + subvalue += previous; + preceding = previous; + previous = character; + } } +tokenizeDeletion.algorithmic = true; + /** * Tokenise inline code. * * @example - * tokenizeInlineCode(eat, '`foo()`', '`', 'foo()'); + * tokenizeInlineCode(eat, '`foo()`'); * * @param {function(string)} eat - * @param {string} $0 - Whole code. - * @param {string} $1 - Initial markers. - * @param {string} $2 - Content. - * @return {Node} - `inlineCode` node. + * @param {string} value - Rest of content. + * @return {Node?} - `inlineCode` node. */ -function tokenizeInlineCode(eat, $0, $1, $2) { - return eat($0)(this.renderRaw(INLINE_CODE, trim($2 || ''))); +function tokenizeInlineCode(eat, value) { + var length = value.length; + var index = 0; + var queue = EMPTY; + var tickQueue = EMPTY; + var contentQueue; + var whiteSpaceQueue; + var count; + var openingCount; + var subvalue; + var character; + var found; + var next; + + while (index < length) { + if (value.charAt(index) !== C_TICK) { + break; + } + + queue += C_TICK; + index++; + } + + if (!queue) { + return; + } + + subvalue = queue; + openingCount = index; + queue = EMPTY; + next = value.charAt(index); + count = 0; + + while (index < length) { + character = next; + next = value.charAt(index + 1); + + if (character === C_TICK) { + count++; + tickQueue += character; + } else { + count = 0; + queue += character; + } + + if (count && next !== C_TICK) { + if (count === openingCount) { + subvalue += queue + tickQueue; + found = true; + break; + } + + queue += tickQueue; + tickQueue = EMPTY; + } + + index++; + } + + if (!found) { + if (openingCount % 2 !== 0) { + return; + } + + queue = EMPTY; + } + + contentQueue = whiteSpaceQueue = EMPTY; + length = queue.length; + index = -1; + + while (++index < length) { + character = queue.charAt(index); + + if (isWhiteSpace(character)) { + whiteSpaceQueue += character; + continue; + } + + if (whiteSpaceQueue) { + if (contentQueue) { + contentQueue += whiteSpaceQueue; + } + + whiteSpaceQueue = EMPTY; + } + + contentQueue += character; + } + + return eat(subvalue)(this.renderRaw(T_INLINE_CODE, contentQueue)); } +tokenizeInlineCode.algorithmic = true; + /** * Tokenise a break. * @@ -1904,13 +4117,38 @@ function tokenizeInlineCode(eat, $0, $1, $2) { * tokenizeBreak(eat, ' \n'); * * @param {function(string)} eat - * @param {string} $0 - * @return {Node} - `break` node. + * @param {string} value - Rest of content. + * @return {Node?} - `break` node. */ -function tokenizeBreak(eat, $0) { - return eat($0)(this.renderVoid(BREAK)); +function tokenizeBreak(eat, value) { + var breaks = this.options.breaks; + var length = value.length; + var index = -1; + var queue = EMPTY; + var character; + + while (++index < length) { + character = value.charAt(index); + + if (character === C_NEWLINE) { + if (!breaks && index < MIN_BREAK_LENGTH) { + return; + } + + queue += character; + return eat(queue)(this.renderVoid(T_BREAK)); + } + + if (character !== C_SPACE) { + return; + } + + queue += character; + } } +tokenizeBreak.algorithmic = true; + /** * Construct a new parser. * @@ -1934,7 +4172,7 @@ function Parser(file, options) { self.inBlockquote = false; self.rules = rules; - self.descape = descapeFactory(rules, 'escape'); + self.descape = descapeFactory(rules, T_ESCAPE); self.options = extend({}, self.options); @@ -2058,7 +4296,7 @@ Parser.prototype.parse = function () { self.offset = {}; - node = self.renderBlock(ROOT, value); + node = self.renderBlock(T_ROOT, value); if (self.options.position) { node.position = { @@ -2169,7 +4407,7 @@ function tokenizeFactory(type) { var lastIndex = -1; while (++character < subvalueLength) { - if (subvalue.charAt(character) === NEW_LINE) { + if (subvalue.charAt(character) === C_NEWLINE) { lastIndex = character; line++; } @@ -2192,7 +4430,7 @@ function tokenizeFactory(type) { /** * Get offset. Called before the fisrt character is - * eaten to retrieve the range's offsets. + * eaten to retrieve the range’s offsets. * * @return {Function} - `done`, to be called when * the last character is eaten. @@ -2203,7 +4441,7 @@ function tokenizeFactory(type) { /** * Done. Called when the last character is - * eaten to retrieve the range's offsets. + * eaten to retrieve the range’s offsets. * * @return {Array.} - Offset. */ @@ -2272,10 +4510,7 @@ function tokenizeFactory(type) { function validateEat(subvalue) { /* istanbul ignore if */ if (value.substring(0, subvalue.length) !== subvalue) { - self.file.fail( - 'Incorrectly eaten value: please report this ' + - 'warning on http://git.io/vUYWz', now() - ); + self.file.fail(ERR_INCORRECTLY_EATEN, now()); } } @@ -2319,10 +4554,10 @@ function tokenizeFactory(type) { /* * If there was already a `position`, this - * node was merged. Fixing `start` wasn't + * node was merged. Fixing `start` wasn’t * hard, but the indent is different. * Especially because some information, the - * indent between `n` and `l` wasn't + * indent between `n` and `l` wasn’t * tracked. Luckily, that space is * (should be?) empty, so we can safely * check for it now. @@ -2377,7 +4612,7 @@ function tokenizeFactory(type) { if (isMultiple) { arrayPush.apply(children, node); } else { - if (type === INLINE && node.type === TEXT) { + if (type === INLINE && node.type === T_TEXT) { node.value = decode(node.value, eater); } @@ -2557,18 +4792,30 @@ function tokenizeFactory(type) { if ( method && - rules[name] && + (method.algorithmic || rules[name]) && (!method.onlyAtStart || self.atStart) && (!method.onlyAtTop || self.atTop) && (!method.notInBlockquote || !self.inBlockquote) && (!method.notInLink || !self.inLink) ) { - match = rules[name].exec(value); + if (rules[name]) { + match = rules[name].exec(value); + + if (match) { + valueLength = value.length; - if (match) { + method.apply(self, [eater].concat(match)); + + matched = valueLength !== value.length; + + if (matched) { + break; + } + } + } else { valueLength = value.length; - method.apply(self, [eater].concat(match)); + method.apply(self, [eater, value]); matched = valueLength !== value.length; @@ -2581,7 +4828,7 @@ function tokenizeFactory(type) { /* istanbul ignore if */ if (!matched) { - self.file.fail('Infinite loop', eater.now()); + self.file.fail(ERR_INFINITE_LOOP, eater.now()); /* * Errors are not thrown on `File#fail` @@ -2614,7 +4861,7 @@ Parser.prototype.blockTokenizers = { 'horizontalRule': tokenizeHorizontalRule, 'blockquote': tokenizeBlockquote, 'list': tokenizeList, - 'html': tokenizeHtml, + 'html': tokenizeHTML, 'definition': tokenizeDefinition, 'footnoteDefinition': tokenizeFootnoteDefinition, 'looseTable': tokenizeTable, @@ -2668,7 +4915,6 @@ Parser.prototype.inlineTokenizers = { 'tag': tokenizeTag, 'link': tokenizeLink, 'reference': tokenizeReference, - 'shortcutReference': tokenizeReference, 'strong': tokenizeStrong, 'emphasis': tokenizeEmphasis, 'deletion': tokenizeDeletion, diff --git a/lib/parse/block-elements.json b/lib/parse/block-elements.json new file mode 100644 index 000000000..27c875c3f --- /dev/null +++ b/lib/parse/block-elements.json @@ -0,0 +1,52 @@ +[ + "article", + "header", + "aside", + "hgroup", + "blockquote", + "hr", + "iframe", + "body", + "li", + "map", + "button", + "object", + "canvas", + "ol", + "caption", + "output", + "col", + "p", + "colgroup", + "pre", + "dd", + "progress", + "div", + "section", + "dl", + "table", + "td", + "dt", + "tbody", + "embed", + "textarea", + "fieldset", + "tfoot", + "figcaption", + "th", + "figure", + "thead", + "footer", + "tr", + "form", + "ul", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "video", + "script", + "style" +] \ No newline at end of file diff --git a/script/build-expressions.js b/script/build-expressions.js index 9c1249dab..c23a26f10 100644 --- a/script/build-expressions.js +++ b/script/build-expressions.js @@ -84,100 +84,20 @@ function group(start, end, negate) { var expressions = {}; var rules = {}; var gfm = {}; -var footnotes = {}; -var yaml = {}; var pedantic = {}; var commonmark = {}; var commonmarkGFM = {}; -var breaks = {}; -var breaksGFM = {}; expressions.rules = rules; expressions.gfm = gfm; -expressions.footnotes = footnotes; -expressions.yaml = yaml; expressions.pedantic = pedantic; expressions.commonmark = commonmark; expressions.commonmarkGFM = commonmarkGFM; -expressions.breaks = breaks; -expressions.breaksGFM = breaksGFM; - -/* - * HTML Block elements. - */ - -var HTML_BLOCK_ELEMENTS = '(?:' + [ - 'article', - 'header', - 'aside', - 'hgroup', - 'blockquote', - 'hr', - 'iframe', - 'body', - 'li', - 'map', - 'button', - 'object', - 'canvas', - 'ol', - 'caption', - 'output', - 'col', - 'p', - 'colgroup', - 'pre', - 'dd', - 'progress', - 'div', - 'section', - 'dl', - 'table', - 'td', - 'dt', - 'tbody', - 'embed', - 'textarea', - 'fieldset', - 'tfoot', - 'figcaption', - 'th', - 'figure', - 'thead', - 'footer', - 'tr', - 'form', - 'ul', - 'h1', - 'h2', - 'h3', - 'h4', - 'h5', - 'h6', - 'video', - 'script', - 'style' -].join('|') + ')'; /* * Block helpers. */ -rules.newline = /^\n((?:[ \t]*\n)*)/; - -rules.code = /^((?:(?: {4}|\t)[^\n]*\n?((?:[ \t]*\n)*))+)/; - -rules.horizontalRule = /^[ \t]*([-*_])( *\1){2,} *(?=\n|$)/; - -pedantic.heading = - /^([ \t]*)(#{1,6})([ \t]*)([^\n]*?)[ \t]*#*[ \t]*(?=\n|$)/; - -rules.heading = - /^([ \t]*)(#{1,6})(?:([ \t]+)([^\n]+?))??(?:[ \t]+#+)?[ \t]*(?=\n|$)/; - -rules.lineHeading = - /^(\ {0,3})([^\n]+?)[ \t]*\n\ {0,3}(=|-){1,}[ \t]*(?=\n|$)/; - rules.definition = /^[ \t]*\[((?:[^\\](?:\\|\\(?:\\{2})+)\]|[^\]])+)\]:[ \t\n]*(<[^>\[\]]+>|[^\s\[\]]+)(?:[ \t\n]+['"(]((?:[^\n]|\n(?!\n))*?)['")])?[ \t]*(?=\n|$)/; @@ -274,84 +194,15 @@ var inlineTags = '(?!' + ':\\/|[^\\w\\s@]*@' + ')\\b'; -var tagName = '(?:[a-zA-Z][a-zA-Z0-9]*)'; -var attributeName = '(?:[a-zA-Z_:][a-zA-Z0-9_.:-]*)'; -var whitespace = '(?:\\s+)'; -var unquotedAttribute = '[^"\'=<>`]+'; -var singleQuotedAttribute = '\'[^\']*\''; -var doubleQuotedAttribute = '"[^"]*"'; -var attributeValue = '(?:' + unquotedAttribute + '|' + singleQuotedAttribute + '|' + doubleQuotedAttribute + ')'; -var attributeValueSpec = '(?:' + whitespace + '?' + '=' + whitespace + '?' + attributeValue + ')'; -var attribute = '(?:' + whitespace + attributeName + attributeValueSpec + '?)'; -var openTag = '(?:<' + tagName + attribute + '*' + whitespace + '?/?>)'; -var closingTag = '(?:)'; -var openBlockTag = '(?:<' + HTML_BLOCK_ELEMENTS + attribute + '*' + whitespace + '?/?>?)'; -var closingBlockTag = '(?:)'; -var htmlComment = ''; -var commonmarkComment = '(?:)'; -var processingInstruction = '(?:<\\?(?:[^\\?]|\\?(?!>))+\\?>)'; -var declaration = '(?:)'; -var cdata = '(?:)'; -var htmlBlockTag = '(?:' + openBlockTag + '|' + closingBlockTag + ')'; - -var inlineTag = '^(?:' + - openTag + '|' + - closingTag + '|' + - htmlComment + '|' + - processingInstruction + '|' + - declaration + '|' + - cdata + -')'; - -var commonmarkInlineTag = '^(?:' + - openTag + '|' + - closingTag + '|' + - commonmarkComment + '|' + - processingInstruction + '|' + - declaration + '|' + - cdata + -')'; - -var html = new RegExp('^(?:' + - '[ \\t]*' + - '(?:' + - htmlBlockTag + '|' + - htmlComment + '|' + - processingInstruction + '|' + - declaration + '|' + - cdata + - ')' + - '[\\s\\S]*?' + - '[ \\t]*?' + - '(?:\\n{2,}|\\s*$)' + -')', 'i'); - -var commonmarkHTML = new RegExp('^(?:' + - '[ \\t]*' + - '(?:' + - htmlBlockTag + '|' + - commonmarkComment + '|' + - processingInstruction + '|' + - declaration + '|' + - cdata + - ')' + - '[\\s\\S]*?' + - '[ \\t]*?' + - '(?:\\n{2,}|\\s*$)' + -')', 'i'); - -rules.html = html; -commonmark.html = commonmarkHTML; - rules.paragraph = new RegExp( '^(?:(?:' + '[^\\n]+\\n?' + '(?!' + - cleanExpression(rules.horizontalRule) + + cleanExpression(/^[ \t]*([-*_])( *\1){2,} *(?=\n|$)/) + '|' + - cleanExpression(rules.heading) + + cleanExpression(/^([ \t]*)(#{1,6})(?:([ \t]+)([^\n]+?))??(?:[ \t]+#+)?[ \t]*(?=\n|$)/) + '|' + - cleanExpression(rules.lineHeading) + + cleanExpression(/^(\ {0,3})([^\n]+?)[ \t]*\n\ {0,3}(=|-){1,}[ \t]*(?=\n|$)/) + '|' + cleanExpression(rules.definition) + '|' + @@ -366,12 +217,9 @@ rules.paragraph = new RegExp( * GFM Block Grammar. */ -gfm.fences = - /^( *)(([`~])\3{2,})[ \t]*([^\n`~]+)?[ \t]*(?:\n([\s\S]*?))??(?:\n\ {0,3}\2\3*[ \t]*(?=\n|$)|$)/; - gfm.paragraph = new RegExp( rules.paragraph.source.replace('(?=\\n|$)|', '(?=\\n|$)|' + - cleanExpression(gfm.fences).replace(/\\2/g, '\\4').replace(/\\3/g, '\\5') + + cleanExpression(/^( *)(([`~])\3{2,})[ \t]*([^\n`~]+)?[ \t]*(?:\n([\s\S]*?))??(?:\n\ {0,3}\2\3*[ \t]*(?=\n|$)|$)/).replace(/\\2/g, '\\4').replace(/\\3/g, '\\5') + '|' + cleanExpression(rules.list).replace(/\\1/g, '\\8') + '|' @@ -384,41 +232,17 @@ gfm.table = gfm.looseTable = /^( *(\S.*\|.*))\n( *([-:]+ *\|[-| :]*)\n)((?:.*\|.*(?:\n|$))*)/; -/* - * Footnote block grammar - */ - -footnotes.footnoteDefinition = - /^( *\[\^([^\]]+)\]: *)([^\n]+((?:\n+ +[^\n]+)*))/; - -/* - * YAML front matter. - */ - -yaml.yamlFrontMatter = /^-{3}\n([\s\S]+?\n)?-{3}/; - /* * Inline-Level Grammar. */ rules.escape = /^\\([\\`*{}\[\]()#+\-.!_>])/; -rules.autoLink = /^<([^ >]+(@|:\/)[^ >]+)>/; - -rules.tag = new RegExp(inlineTag); -commonmark.tag = new RegExp(commonmarkInlineTag); - rules.strong = /^(_)_((?:\\[\s\S]|[^\\])+?)__(?!_)|^(\*)\*((?:\\[\s\S]|[^\\])+?)\*\*(?!\*)/; rules.emphasis = /^\b(_)((?:__|\\[\s\S]|[^\\])+?)_\b|^(\*)((?:\*\*|\\[\s\S]|[^\\])+?)\*(?!\*)/; -rules.inlineCode = /^(`+)((?!`)[\s\S]*?(?:`\s+|[^`]))?(\1)(?!`)/; - -rules.break = /^ {2,}\n(?!\s*$)/; - -rules.inlineText = /^[\s\S]+?(?=[\\