From 6a57c4ac96043a8b841921572b6ac7a397abfb65 Mon Sep 17 00:00:00 2001 From: andreyvolokitin Date: Mon, 22 Jan 2018 01:33:23 +0400 Subject: [PATCH] Add option to not wrap collapsed tags, and bugfix See https://github.com/beautify-web/js-beautify/issues/1304 --- README.md | 1 + js/lib/beautify-html.js | 53 +++++++++++- js/lib/cli.js | 3 + js/src/html/beautifier.js | 53 +++++++++++- js/test/generated/beautify-html-tests.js | 84 ++++++++++++++++++ test/data/html/tests.js | 104 +++++++++++++++++++++++ 6 files changed, 292 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b1ec212a6..42559f286 100644 --- a/README.md +++ b/README.md @@ -280,6 +280,7 @@ HTML Beautifier Options: -U, --unformatted List of tags (defaults to inline) that should not be reformatted -T, --content_unformatted List of tags (defaults to pre) whose content should not be reformatted -E, --extra_liners List of tags (defaults to [head,body,/html] that should have an extra newline before them. + -K, --keep_collapsed_whitespace Don't add a newline before tag which has a direct previous sibling tag. Default is false. --editorconfig Use EditorConfig to set up the options ``` diff --git a/js/lib/beautify-html.js b/js/lib/beautify-html.js index c7fcf6d01..d1a65dd0c 100644 --- a/js/lib/beautify-html.js +++ b/js/lib/beautify-html.js @@ -267,6 +267,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { options.extra_liners.concat() : (typeof options.extra_liners === 'string') ? options.extra_liners.split(',') : 'head,body,/html'.split(','); eol = options.eol ? options.eol : 'auto'; + keep_collapsed_whitespace = (options.keep_collapsed_whitespace === undefined) ? false : options.keep_collapsed_whitespace; if (options.indent_with_tabs) { indent_character = '\t'; @@ -301,6 +302,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { this.indent_content = indent_inner_html; this.indent_body_inner_html = indent_body_inner_html; this.indent_head_inner_html = indent_head_inner_html; + this.should_wrap_next_tag_start = true; this.Utils = { //Uilities made available to the various functions whitespace: "\n\r\t ".split(''), @@ -448,6 +450,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { }; this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Object + tag = ltrim(rtrim(tag)); if (this.tags[tag + 'count']) { //check for the existence of this tag type this.tags[tag + 'count']++; this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level @@ -949,11 +952,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } this.print_newline = function(force, arr) { - this.line_char_count = 0; if (!arr || !arr.length) { return; } if (force || (arr[arr.length - 1] !== '\n')) { //we might want the extra line + this.line_char_count = 0; if ((arr[arr.length - 1] !== '\n')) { arr[arr.length - 1] = rtrim(arr[arr.length - 1]); } @@ -1021,10 +1024,32 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { /*_____________________--------------------_____________________*/ this.beautify = function() { + function disallow_next_tag_wrapping_if_required() { + if ( + // Should we keep whitespace between tags collapsed? + keep_collapsed_whitespace && + // Means that there is no whitespace at the end of token_text + !multi_parser.is_whitespace(multi_parser.token_text[multi_parser.token_text.length - 1]) + ) { + multi_parser.should_wrap_next_tag_start = false; + } + } + + function print_newline_if_allowed() { + // Don't add a newline before tag which has a direct previous sibling tag (this is optional). + if ( + !keep_collapsed_whitespace || + multi_parser.should_wrap_next_tag_start + ) { + multi_parser.print_newline(false, multi_parser.output); + } + } + multi_parser = new Parser(); //wrapping functions Parser multi_parser.printer(html_source, indent_character, indent_size, wrap_line_length, brace_style); //initialize starting values while (true) { var t = multi_parser.get_token(); + var token_was_matched = true; multi_parser.token_text = t[0]; multi_parser.token_type = t[1]; @@ -1034,7 +1059,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { switch (multi_parser.token_type) { case 'TK_TAG_START': - multi_parser.print_newline(false, multi_parser.output); + print_newline_if_allowed(); multi_parser.print_token(multi_parser.token_text); if (multi_parser.indent_content) { if ((multi_parser.indent_body_inner_html || !multi_parser.token_text.match(//)) && @@ -1054,6 +1079,8 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { multi_parser.current_mode = 'CONTENT'; break; case 'TK_TAG_END': + disallow_next_tag_wrapping_if_required(); + //Print new line only if the tag has no content and has child if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') { var tag_name = (multi_parser.token_text.match(/\w+/) || [])[0]; @@ -1073,8 +1100,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { // Don't add a newline before elements that should remain unformatted. var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i); if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) { - multi_parser.print_newline(false, multi_parser.output); + print_newline_if_allowed(); } + + disallow_next_tag_wrapping_if_required(); + multi_parser.print_token(multi_parser.token_text); multi_parser.current_mode = 'CONTENT'; break; @@ -1106,6 +1136,10 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { multi_parser.current_mode = 'TAG'; break; case 'TK_CONTENT': + if (multi_parser.token_text.length !== 0) { + multi_parser.should_wrap_next_tag_start = true; + } + multi_parser.print_token(multi_parser.token_text); multi_parser.current_mode = 'TAG'; break; @@ -1155,6 +1189,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { multi_parser.current_mode = 'TAG'; break; default: + token_was_matched = false; // We should not be getting here but we don't want to drop input on the floor // Just output the text and move on if (multi_parser.token_text !== '') { @@ -1162,6 +1197,18 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } break; } + + if ( + (token_was_matched && + multi_parser.token_type !== 'TK_TAG_SINGLE' && + multi_parser.token_type !== 'TK_TAG_END' && + multi_parser.token_type !== 'TK_CONTENT') || + + !token_was_matched + ) { + multi_parser.should_wrap_next_tag_start = true; + } + multi_parser.last_token = multi_parser.token_type; multi_parser.last_text = multi_parser.token_text; } diff --git a/js/lib/cli.js b/js/lib/cli.js index 99b243602..748125b86 100755 --- a/js/lib/cli.js +++ b/js/lib/cli.js @@ -101,6 +101,7 @@ var path = require('path'), "indent_handlebars": [Boolean], "indent_scripts": ["keep", "separate", "normal"], "extra_liners": [String, Array], + "keep_collapsed_whitespace": Boolean, // CLI "version": Boolean, "help": Boolean, @@ -150,6 +151,7 @@ var path = require('path'), "H": ["--indent_handlebars"], "S": ["--indent_scripts"], "E": ["--extra_liners"], + "K": ["--keep_collapsed_whitespace"], // non-dasherized hybrid shortcuts "good-stuff": [ "--keep_array_indentation", @@ -373,6 +375,7 @@ function usage(err) { msg.push(' -U, --unformatted List of tags (defaults to inline) that should not be reformatted'); msg.push(' -T, --content_unformatted List of tags (defaults to pre) whose content should not be reformatted'); msg.push(' -E, --extra_liners List of tags (defaults to [head,body,/html] that should have an extra newline'); + msg.push(' -K, --keep_collapsed_whitespace Don\'t add a newline before tag which has a direct previous sibling tag. Default is false.'); break; case "css": msg.push(' -L, --selector-separator-newline Add a newline between multiple selectors.'); diff --git a/js/src/html/beautifier.js b/js/src/html/beautifier.js index 61f5f5a68..eb11650a7 100644 --- a/js/src/html/beautifier.js +++ b/js/src/html/beautifier.js @@ -119,6 +119,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { options.extra_liners.concat() : (typeof options.extra_liners === 'string') ? options.extra_liners.split(',') : 'head,body,/html'.split(','); eol = options.eol ? options.eol : 'auto'; + keep_collapsed_whitespace = (options.keep_collapsed_whitespace === undefined) ? false : options.keep_collapsed_whitespace; if (options.indent_with_tabs) { indent_character = '\t'; @@ -153,6 +154,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { this.indent_content = indent_inner_html; this.indent_body_inner_html = indent_body_inner_html; this.indent_head_inner_html = indent_head_inner_html; + this.should_wrap_next_tag_start = true; this.Utils = { //Uilities made available to the various functions whitespace: "\n\r\t ".split(''), @@ -300,6 +302,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { }; this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Object + tag = ltrim(rtrim(tag)); if (this.tags[tag + 'count']) { //check for the existence of this tag type this.tags[tag + 'count']++; this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level @@ -801,11 +804,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } this.print_newline = function(force, arr) { - this.line_char_count = 0; if (!arr || !arr.length) { return; } if (force || (arr[arr.length - 1] !== '\n')) { //we might want the extra line + this.line_char_count = 0; if ((arr[arr.length - 1] !== '\n')) { arr[arr.length - 1] = rtrim(arr[arr.length - 1]); } @@ -873,10 +876,32 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { /*_____________________--------------------_____________________*/ this.beautify = function() { + function disallow_next_tag_wrapping_if_required() { + if ( + // Should we keep whitespace between tags collapsed? + keep_collapsed_whitespace && + // Means that there is no whitespace at the end of token_text + !multi_parser.is_whitespace(multi_parser.token_text[multi_parser.token_text.length - 1]) + ) { + multi_parser.should_wrap_next_tag_start = false; + } + } + + function print_newline_if_allowed() { + // Don't add a newline before tag which has a direct previous sibling tag (this is optional). + if ( + !keep_collapsed_whitespace || + multi_parser.should_wrap_next_tag_start + ) { + multi_parser.print_newline(false, multi_parser.output); + } + } + multi_parser = new Parser(); //wrapping functions Parser multi_parser.printer(html_source, indent_character, indent_size, wrap_line_length, brace_style); //initialize starting values while (true) { var t = multi_parser.get_token(); + var token_was_matched = true; multi_parser.token_text = t[0]; multi_parser.token_type = t[1]; @@ -886,7 +911,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { switch (multi_parser.token_type) { case 'TK_TAG_START': - multi_parser.print_newline(false, multi_parser.output); + print_newline_if_allowed(); multi_parser.print_token(multi_parser.token_text); if (multi_parser.indent_content) { if ((multi_parser.indent_body_inner_html || !multi_parser.token_text.match(//)) && @@ -906,6 +931,8 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { multi_parser.current_mode = 'CONTENT'; break; case 'TK_TAG_END': + disallow_next_tag_wrapping_if_required(); + //Print new line only if the tag has no content and has child if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') { var tag_name = (multi_parser.token_text.match(/\w+/) || [])[0]; @@ -925,8 +952,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { // Don't add a newline before elements that should remain unformatted. var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i); if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) { - multi_parser.print_newline(false, multi_parser.output); + print_newline_if_allowed(); } + + disallow_next_tag_wrapping_if_required(); + multi_parser.print_token(multi_parser.token_text); multi_parser.current_mode = 'CONTENT'; break; @@ -958,6 +988,10 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { multi_parser.current_mode = 'TAG'; break; case 'TK_CONTENT': + if (multi_parser.token_text.length !== 0) { + multi_parser.should_wrap_next_tag_start = true; + } + multi_parser.print_token(multi_parser.token_text); multi_parser.current_mode = 'TAG'; break; @@ -1007,6 +1041,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { multi_parser.current_mode = 'TAG'; break; default: + token_was_matched = false; // We should not be getting here but we don't want to drop input on the floor // Just output the text and move on if (multi_parser.token_text !== '') { @@ -1014,6 +1049,18 @@ function Beautifier(html_source, options, js_beautify, css_beautify) { } break; } + + if ( + (token_was_matched && + multi_parser.token_type !== 'TK_TAG_SINGLE' && + multi_parser.token_type !== 'TK_TAG_END' && + multi_parser.token_type !== 'TK_CONTENT') || + + !token_was_matched + ) { + multi_parser.should_wrap_next_tag_start = true; + } + multi_parser.last_token = multi_parser.token_type; multi_parser.last_text = multi_parser.token_text; } diff --git a/js/test/generated/beautify-html-tests.js b/js/test/generated/beautify-html-tests.js index 4d3ea0754..85362c196 100644 --- a/js/test/generated/beautify-html-tests.js +++ b/js/test/generated/beautify-html-tests.js @@ -2967,6 +2967,90 @@ function run_html_tests(test_obj, Urlencoded, js_beautify, html_beautify, css_be ''); + //============================================================ + // keep_collapsed_whitespace: prevent wrapping in case of collapsed tags + reset_options(); + opts.keep_collapsed_whitespace = true; + opts.unformatted = []; + test_fragment( + '
Beautify
me

But

not
me Also, wrap me', + // -- output -- + '
Beautify
\n' + + '
me
\n' + + '

But

not
me\n' + + 'Also, wrap me'); + test_fragment( + '...are not wrapped ', + // -- output -- + '...are not wrapped\n' + + ''); + test_fragment( + '
\n' + + ' keep\n' + + '
\n' + + ' tags\n' + + '
\n' + + ' collapsed\n' + + '
', + // -- output -- + '
\n' + + ' keep\n' + + '
\n' + + ' tags\n' + + '
\n' + + ' collapsed\n' + + '
'); + test_fragment(''); + test_fragment(''); + + + //============================================================ + // default keep_collapsed_whitespace + reset_options(); + opts.unformatted = []; + test_fragment( + '
Beautify
me

And

me
also', + // -- output -- + '
Beautify
\n' + + '
me
\n' + + '

And

\n' + + '
me
\n' + + 'also'); + test_fragment( + '...are wrapped ', + // -- output -- + '\n' + + '...are wrapped\n' + + ''); + test_fragment( + '', + // -- output -- + '\n' + + ''); + test_fragment( + '
\n' + + ' all\n' + + '
\n' + + ' tags\n' + + '
\n' + + ' are wrapped\n' + + '
', + // -- output -- + '
\n' + + ' all\n' + + '
\n' + + '
\n' + + ' tags\n' + + '
\n' + + '
\n' + + ' are wrapped\n' + + '
'); + + //============================================================ // New Test Suite reset_options(); diff --git a/test/data/html/tests.js b/test/data/html/tests.js index 021d8ce84..20e9dccab 100644 --- a/test/data/html/tests.js +++ b/test/data/html/tests.js @@ -1099,6 +1099,110 @@ exports.test_data = { '
' ] }] + }, { + name: "keep_collapsed_whitespace: prevent wrapping in case of collapsed tags", + description: "Don't add a newline before a tag which has another closing tag directly before it", + options: [ + { name: 'keep_collapsed_whitespace', value: "true" }, + { name: 'unformatted', value: "[]" } + ], + tests: [{ + fragment: true, + input: '
Beautify
me

But

not
me Also, wrap me', + output: [ + '
Beautify
', + '
me
', + '

But

not
me', + 'Also, wrap me' + ] + }, { + fragment: true, + input: '...are not wrapped ', + output: [ + '...are not wrapped', + '' + ] + }, { + fragment: true, + input: '
\n' + + ' keep\n' + + '
\n' + + ' tags\n' + + '
\n' + + ' collapsed\n' + + '
', + output: [ + '
', + ' keep', + '
', + ' tags', + '
', + ' collapsed', + '
' + ] + }, { + fragment: true, + unchanged: '' + }, { + fragment: true, + unchanged: '' + }] + }, { + name: "default keep_collapsed_whitespace", + description: "by default keep_collapsed_whitespace is off, wrap tags as usual", + options: [ + { name: 'unformatted', value: "[]" } + ], + tests: [{ + fragment: true, + input: '
Beautify
me

And

me
also', + output: [ + '
Beautify
', + '
me
', + '

And

', + '
me
', + 'also' + ] + }, { + fragment: true, + input: '...are wrapped ', + output: [ + '', + '...are wrapped', + '' + ] + }, { + fragment: true, + input: '', + output: [ + '', + '' + ] + }, { + fragment: true, + input: '
\n' + + ' all\n' + + '
\n' + + ' tags\n' + + '
\n' + + ' are wrapped\n' + + '
', + output: [ + '
', + ' all', + '
', + '
', + ' tags', + '
', + '
', + ' are wrapped', + '
' + ] + }] }, { name: "New Test Suite" }],