From 6a57c4ac96043a8b841921572b6ac7a397abfb65 Mon Sep 17 00:00:00 2001
From: andreyvolokitin <andreyvlktn@gmail.com>
Date: Mon, 22 Jan 2018 01:33:23 +0400
Subject: [PATCH] Add option to not wrap collapsed tags, and bugfix

See https://github.com/beautify-web/js-beautify/issues/1304
---
 README.md                                |   1 +
 js/lib/beautify-html.js                  |  53 +++++++++++-
 js/lib/cli.js                            |   3 +
 js/src/html/beautifier.js                |  53 +++++++++++-
 js/test/generated/beautify-html-tests.js |  84 ++++++++++++++++++
 test/data/html/tests.js                  | 104 +++++++++++++++++++++++
 6 files changed, 292 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index b1ec212a6..42559f286 100644
--- a/README.md
+++ b/README.md
@@ -280,6 +280,7 @@ HTML Beautifier Options:
   -U, --unformatted                  List of tags (defaults to inline) that should not be reformatted
   -T, --content_unformatted          List of tags (defaults to pre) whose content should not be reformatted
   -E, --extra_liners                 List of tags (defaults to [head,body,/html] that should have an extra newline before them.
+  -K, --keep_collapsed_whitespace    Don't add a newline before tag which has a direct previous sibling tag. Default is false.
   --editorconfig                     Use EditorConfig to set up the options
 ```
 
diff --git a/js/lib/beautify-html.js b/js/lib/beautify-html.js
index c7fcf6d01..d1a65dd0c 100644
--- a/js/lib/beautify-html.js
+++ b/js/lib/beautify-html.js
@@ -267,6 +267,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         options.extra_liners.concat() : (typeof options.extra_liners === 'string') ?
         options.extra_liners.split(',') : 'head,body,/html'.split(',');
     eol = options.eol ? options.eol : 'auto';
+    keep_collapsed_whitespace = (options.keep_collapsed_whitespace === undefined) ? false : options.keep_collapsed_whitespace;
 
     if (options.indent_with_tabs) {
         indent_character = '\t';
@@ -301,6 +302,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         this.indent_content = indent_inner_html;
         this.indent_body_inner_html = indent_body_inner_html;
         this.indent_head_inner_html = indent_head_inner_html;
+        this.should_wrap_next_tag_start = true;
 
         this.Utils = { //Uilities made available to the various functions
             whitespace: "\n\r\t ".split(''),
@@ -448,6 +450,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         };
 
         this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Object
+            tag = ltrim(rtrim(tag));
             if (this.tags[tag + 'count']) { //check for the existence of this tag type
                 this.tags[tag + 'count']++;
                 this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
@@ -949,11 +952,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
             }
 
             this.print_newline = function(force, arr) {
-                this.line_char_count = 0;
                 if (!arr || !arr.length) {
                     return;
                 }
                 if (force || (arr[arr.length - 1] !== '\n')) { //we might want the extra line
+                    this.line_char_count = 0;
                     if ((arr[arr.length - 1] !== '\n')) {
                         arr[arr.length - 1] = rtrim(arr[arr.length - 1]);
                     }
@@ -1021,10 +1024,32 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
     /*_____________________--------------------_____________________*/
 
     this.beautify = function() {
+        function disallow_next_tag_wrapping_if_required() {
+          if (
+              // Should we keep whitespace between tags collapsed?
+              keep_collapsed_whitespace &&
+              // Means that there is no whitespace at the end of token_text
+              !multi_parser.is_whitespace(multi_parser.token_text[multi_parser.token_text.length - 1])
+            ) {
+              multi_parser.should_wrap_next_tag_start = false;
+            }
+        }
+
+        function print_newline_if_allowed() {
+            // Don't add a newline before tag which has a direct previous sibling tag (this is optional).
+            if (
+              !keep_collapsed_whitespace ||
+              multi_parser.should_wrap_next_tag_start
+            ) {
+              multi_parser.print_newline(false, multi_parser.output);
+            }
+        }
+
         multi_parser = new Parser(); //wrapping functions Parser
         multi_parser.printer(html_source, indent_character, indent_size, wrap_line_length, brace_style); //initialize starting values
         while (true) {
             var t = multi_parser.get_token();
+            var token_was_matched = true;
             multi_parser.token_text = t[0];
             multi_parser.token_type = t[1];
 
@@ -1034,7 +1059,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
 
             switch (multi_parser.token_type) {
                 case 'TK_TAG_START':
-                    multi_parser.print_newline(false, multi_parser.output);
+                    print_newline_if_allowed();
                     multi_parser.print_token(multi_parser.token_text);
                     if (multi_parser.indent_content) {
                         if ((multi_parser.indent_body_inner_html || !multi_parser.token_text.match(/<body(?:.*)>/)) &&
@@ -1054,6 +1079,8 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'CONTENT';
                     break;
                 case 'TK_TAG_END':
+                    disallow_next_tag_wrapping_if_required();
+
                     //Print new line only if the tag has no content and has child
                     if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
                         var tag_name = (multi_parser.token_text.match(/\w+/) || [])[0];
@@ -1073,8 +1100,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     // Don't add a newline before elements that should remain unformatted.
                     var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i);
                     if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) {
-                        multi_parser.print_newline(false, multi_parser.output);
+                        print_newline_if_allowed();
                     }
+
+                    disallow_next_tag_wrapping_if_required();
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'CONTENT';
                     break;
@@ -1106,6 +1136,10 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 case 'TK_CONTENT':
+                    if (multi_parser.token_text.length !== 0) {
+                      multi_parser.should_wrap_next_tag_start = true;
+                    }
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'TAG';
                     break;
@@ -1155,6 +1189,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 default:
+                    token_was_matched = false;
                     // We should not be getting here but we don't want to drop input on the floor
                     // Just output the text and move on
                     if (multi_parser.token_text !== '') {
@@ -1162,6 +1197,18 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     }
                     break;
             }
+
+            if (
+              (token_was_matched &&
+              multi_parser.token_type !== 'TK_TAG_SINGLE' &&
+              multi_parser.token_type !== 'TK_TAG_END' &&
+              multi_parser.token_type !== 'TK_CONTENT') ||
+
+              !token_was_matched
+            ) {
+              multi_parser.should_wrap_next_tag_start = true;
+            }
+
             multi_parser.last_token = multi_parser.token_type;
             multi_parser.last_text = multi_parser.token_text;
         }
diff --git a/js/lib/cli.js b/js/lib/cli.js
index 99b243602..748125b86 100755
--- a/js/lib/cli.js
+++ b/js/lib/cli.js
@@ -101,6 +101,7 @@ var path = require('path'),
         "indent_handlebars": [Boolean],
         "indent_scripts": ["keep", "separate", "normal"],
         "extra_liners": [String, Array],
+        "keep_collapsed_whitespace": Boolean,
         // CLI
         "version": Boolean,
         "help": Boolean,
@@ -150,6 +151,7 @@ var path = require('path'),
         "H": ["--indent_handlebars"],
         "S": ["--indent_scripts"],
         "E": ["--extra_liners"],
+        "K": ["--keep_collapsed_whitespace"],
         // non-dasherized hybrid shortcuts
         "good-stuff": [
             "--keep_array_indentation",
@@ -373,6 +375,7 @@ function usage(err) {
             msg.push('  -U, --unformatted                 List of tags (defaults to inline) that should not be reformatted');
             msg.push('  -T, --content_unformatted         List of tags (defaults to pre) whose content should not be reformatted');
             msg.push('  -E, --extra_liners                List of tags (defaults to [head,body,/html] that should have an extra newline');
+            msg.push('  -K, --keep_collapsed_whitespace   Don\'t add a newline before tag which has a direct previous sibling tag. Default is false.');
             break;
         case "css":
             msg.push('  -L, --selector-separator-newline        Add a newline between multiple selectors.');
diff --git a/js/src/html/beautifier.js b/js/src/html/beautifier.js
index 61f5f5a68..eb11650a7 100644
--- a/js/src/html/beautifier.js
+++ b/js/src/html/beautifier.js
@@ -119,6 +119,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         options.extra_liners.concat() : (typeof options.extra_liners === 'string') ?
         options.extra_liners.split(',') : 'head,body,/html'.split(',');
     eol = options.eol ? options.eol : 'auto';
+    keep_collapsed_whitespace = (options.keep_collapsed_whitespace === undefined) ? false : options.keep_collapsed_whitespace;
 
     if (options.indent_with_tabs) {
         indent_character = '\t';
@@ -153,6 +154,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         this.indent_content = indent_inner_html;
         this.indent_body_inner_html = indent_body_inner_html;
         this.indent_head_inner_html = indent_head_inner_html;
+        this.should_wrap_next_tag_start = true;
 
         this.Utils = { //Uilities made available to the various functions
             whitespace: "\n\r\t ".split(''),
@@ -300,6 +302,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         };
 
         this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Object
+            tag = ltrim(rtrim(tag));
             if (this.tags[tag + 'count']) { //check for the existence of this tag type
                 this.tags[tag + 'count']++;
                 this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
@@ -801,11 +804,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
             }
 
             this.print_newline = function(force, arr) {
-                this.line_char_count = 0;
                 if (!arr || !arr.length) {
                     return;
                 }
                 if (force || (arr[arr.length - 1] !== '\n')) { //we might want the extra line
+                    this.line_char_count = 0;
                     if ((arr[arr.length - 1] !== '\n')) {
                         arr[arr.length - 1] = rtrim(arr[arr.length - 1]);
                     }
@@ -873,10 +876,32 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
     /*_____________________--------------------_____________________*/
 
     this.beautify = function() {
+        function disallow_next_tag_wrapping_if_required() {
+          if (
+              // Should we keep whitespace between tags collapsed?
+              keep_collapsed_whitespace &&
+              // Means that there is no whitespace at the end of token_text
+              !multi_parser.is_whitespace(multi_parser.token_text[multi_parser.token_text.length - 1])
+            ) {
+              multi_parser.should_wrap_next_tag_start = false;
+            }
+        }
+
+        function print_newline_if_allowed() {
+            // Don't add a newline before tag which has a direct previous sibling tag (this is optional).
+            if (
+              !keep_collapsed_whitespace ||
+              multi_parser.should_wrap_next_tag_start
+            ) {
+              multi_parser.print_newline(false, multi_parser.output);
+            }
+        }
+
         multi_parser = new Parser(); //wrapping functions Parser
         multi_parser.printer(html_source, indent_character, indent_size, wrap_line_length, brace_style); //initialize starting values
         while (true) {
             var t = multi_parser.get_token();
+            var token_was_matched = true;
             multi_parser.token_text = t[0];
             multi_parser.token_type = t[1];
 
@@ -886,7 +911,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
 
             switch (multi_parser.token_type) {
                 case 'TK_TAG_START':
-                    multi_parser.print_newline(false, multi_parser.output);
+                    print_newline_if_allowed();
                     multi_parser.print_token(multi_parser.token_text);
                     if (multi_parser.indent_content) {
                         if ((multi_parser.indent_body_inner_html || !multi_parser.token_text.match(/<body(?:.*)>/)) &&
@@ -906,6 +931,8 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'CONTENT';
                     break;
                 case 'TK_TAG_END':
+                    disallow_next_tag_wrapping_if_required();
+
                     //Print new line only if the tag has no content and has child
                     if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
                         var tag_name = (multi_parser.token_text.match(/\w+/) || [])[0];
@@ -925,8 +952,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     // Don't add a newline before elements that should remain unformatted.
                     var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i);
                     if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) {
-                        multi_parser.print_newline(false, multi_parser.output);
+                        print_newline_if_allowed();
                     }
+
+                    disallow_next_tag_wrapping_if_required();
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'CONTENT';
                     break;
@@ -958,6 +988,10 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 case 'TK_CONTENT':
+                    if (multi_parser.token_text.length !== 0) {
+                      multi_parser.should_wrap_next_tag_start = true;
+                    }
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'TAG';
                     break;
@@ -1007,6 +1041,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 default:
+                    token_was_matched = false;
                     // We should not be getting here but we don't want to drop input on the floor
                     // Just output the text and move on
                     if (multi_parser.token_text !== '') {
@@ -1014,6 +1049,18 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     }
                     break;
             }
+
+            if (
+              (token_was_matched &&
+              multi_parser.token_type !== 'TK_TAG_SINGLE' &&
+              multi_parser.token_type !== 'TK_TAG_END' &&
+              multi_parser.token_type !== 'TK_CONTENT') ||
+
+              !token_was_matched
+            ) {
+              multi_parser.should_wrap_next_tag_start = true;
+            }
+
             multi_parser.last_token = multi_parser.token_type;
             multi_parser.last_text = multi_parser.token_text;
         }
diff --git a/js/test/generated/beautify-html-tests.js b/js/test/generated/beautify-html-tests.js
index 4d3ea0754..85362c196 100644
--- a/js/test/generated/beautify-html-tests.js
+++ b/js/test/generated/beautify-html-tests.js
@@ -2967,6 +2967,90 @@ function run_html_tests(test_obj, Urlencoded, js_beautify, html_beautify, css_be
             '</div>');
 
 
+        //============================================================
+        // keep_collapsed_whitespace: prevent wrapping in case of collapsed tags
+        reset_options();
+        opts.keep_collapsed_whitespace = true;
+        opts.unformatted = [];
+        test_fragment(
+            '<div>Beautify</div> <div>me</div> <p>But</p><div>not</div><i>me</i> <span>Also, wrap me</span>',
+            //  -- output --
+            '<div>Beautify</div>\n' +
+            '<div>me</div>\n' +
+            '<p>But</p><div>not</div><i>me</i>\n' +
+            '<span>Also, wrap me</span>');
+        test_fragment(
+            '<input value="Collapsed single tags..." type="text"><img src="" alt="...are not wrapped"/> <input value="This single tag is wrapped" type="text">',
+            //  -- output --
+            '<input value="Collapsed single tags..." type="text"><img src="" alt="...are not wrapped" />\n' +
+            '<input value="This single tag is wrapped" type="text">');
+        test_fragment(
+            '<div>\n' +
+            '    keep\n' +
+            '</div\n' +
+            '><div>\n' +
+            '    tags\n' +
+            '</div\n' +
+            '><div>\n' +
+            '    collapsed\n' +
+            '</div>',
+            //  -- output --
+            '<div>\n' +
+            '    keep\n' +
+            '</div><div>\n' +
+            '    tags\n' +
+            '</div><div>\n' +
+            '    collapsed\n' +
+            '</div>');
+        test_fragment('<label>Type to the right: </label><input type="text">');
+        test_fragment('<input type="text"><label> - type to the left</label>');
+
+
+        //============================================================
+        // default keep_collapsed_whitespace
+        reset_options();
+        opts.unformatted = [];
+        test_fragment(
+            '<div>Beautify</div> <div>me</div> <p>And</p><div>me</div><i>also</i>',
+            //  -- output --
+            '<div>Beautify</div>\n' +
+            '<div>me</div>\n' +
+            '<p>And</p>\n' +
+            '<div>me</div>\n' +
+            '<i>also</i>');
+        test_fragment(
+            '<input value="Collapsed single tags..." type="text"><img src="" alt="...are wrapped"/> <input value="This single tag is wrapped" type="text">',
+            //  -- output --
+            '<input value="Collapsed single tags..." type="text">\n' +
+            '<img src="" alt="...are wrapped" />\n' +
+            '<input value="This single tag is wrapped" type="text">');
+        test_fragment(
+            '<label>Wrap</label><input value="us" type="text">',
+            //  -- output --
+            '<label>Wrap</label>\n' +
+            '<input value="us" type="text">');
+        test_fragment(
+            '<div>\n' +
+            '    all\n' +
+            '</div\n' +
+            '><div>\n' +
+            '    tags\n' +
+            '</div\n' +
+            '><div>\n' +
+            '    are wrapped\n' +
+            '</div>',
+            //  -- output --
+            '<div>\n' +
+            '    all\n' +
+            '</div>\n' +
+            '<div>\n' +
+            '    tags\n' +
+            '</div>\n' +
+            '<div>\n' +
+            '    are wrapped\n' +
+            '</div>');
+
+
         //============================================================
         // New Test Suite
         reset_options();
diff --git a/test/data/html/tests.js b/test/data/html/tests.js
index 021d8ce84..20e9dccab 100644
--- a/test/data/html/tests.js
+++ b/test/data/html/tests.js
@@ -1099,6 +1099,110 @@ exports.test_data = {
                 '</div>'
             ]
         }]
+    }, {
+        name: "keep_collapsed_whitespace: prevent wrapping in case of collapsed tags",
+        description: "Don't add a newline before a tag which has another closing tag directly before it",
+        options: [
+            { name: 'keep_collapsed_whitespace', value: "true" },
+            { name: 'unformatted', value: "[]" }
+        ],
+        tests: [{
+            fragment: true,
+            input: '<div>Beautify</div> <div>me</div> <p>But</p><div>not</div><i>me</i> <span>Also, wrap me</span>',
+            output: [
+                '<div>Beautify</div>',
+                '<div>me</div>',
+                '<p>But</p><div>not</div><i>me</i>',
+                '<span>Also, wrap me</span>'
+            ]
+        }, {
+            fragment: true,
+            input: '<input value="Collapsed single tags..." type="text"><img src="" alt="...are not wrapped"/> <input value="This single tag is wrapped" type="text">',
+            output: [
+                '<input value="Collapsed single tags..." type="text"><img src="" alt="...are not wrapped" />',
+                '<input value="This single tag is wrapped" type="text">'
+            ]
+        }, {
+            fragment: true,
+            input: '<div>\n' +
+                '    keep\n' +
+                '</div\n' +
+                '><div>\n' +
+                '    tags\n' +
+                '</div\n' +
+                '><div>\n' +
+                '    collapsed\n' +
+                '</div>',
+            output: [
+                '<div>',
+                '    keep',
+                '</div><div>',
+                '    tags',
+                '</div><div>',
+                '    collapsed',
+                '</div>'
+            ]
+        }, {
+            fragment: true,
+            unchanged: '<label>Type to the right: </label><input type="text">'
+        }, {
+            fragment: true,
+            unchanged: '<input type="text"><label> - type to the left</label>'
+        }]
+    }, {
+        name: "default keep_collapsed_whitespace",
+        description: "by default keep_collapsed_whitespace is off, wrap tags as usual",
+        options: [
+            { name: 'unformatted', value: "[]" }
+        ],
+        tests: [{
+            fragment: true,
+            input: '<div>Beautify</div> <div>me</div> <p>And</p><div>me</div><i>also</i>',
+            output: [
+                '<div>Beautify</div>',
+                '<div>me</div>',
+                '<p>And</p>',
+                '<div>me</div>',
+                '<i>also</i>'
+            ]
+        }, {
+            fragment: true,
+            input: '<input value="Collapsed single tags..." type="text"><img src="" alt="...are wrapped"/> <input value="This single tag is wrapped" type="text">',
+            output: [
+                '<input value="Collapsed single tags..." type="text">',
+                '<img src="" alt="...are wrapped" />',
+                '<input value="This single tag is wrapped" type="text">'
+            ]
+        }, {
+            fragment: true,
+            input: '<label>Wrap</label><input value="us" type="text">',
+            output: [
+                '<label>Wrap</label>',
+                '<input value="us" type="text">'
+            ]
+        }, {
+            fragment: true,
+            input: '<div>\n' +
+                '    all\n' +
+                '</div\n' +
+                '><div>\n' +
+                '    tags\n' +
+                '</div\n' +
+                '><div>\n' +
+                '    are wrapped\n' +
+                '</div>',
+            output: [
+                '<div>',
+                '    all',
+                '</div>',
+                '<div>',
+                '    tags',
+                '</div>',
+                '<div>',
+                '    are wrapped',
+                '</div>'
+            ]
+        }]
     }, {
         name: "New Test Suite"
     }],