beautifier · andreyvolokitin · Jan 21, 2018 · Mar 1, 2018 · May 12, 2018
diff --git a/README.md b/README.md
@@ -294,6 +294,7 @@ HTML Beautifier Options:
   -U, --unformatted                  List of tags (defaults to inline) that should not be reformatted
   -T, --content_unformatted          List of tags (defaults to pre) whose content should not be reformatted
   -E, --extra_liners                 List of tags (defaults to [head,body,/html] that should have an extra newline before them.
+  -K, --keep_collapsed_whitespace    Don't add a newline before tag which has a direct previous sibling tag. Default is false.
   --editorconfig                     Use EditorConfig to set up the options
 ```
 

diff --git a/js/lib/beautify-html.js b/js/lib/beautify-html.js
@@ -267,6 +267,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         options.extra_liners.concat() : (typeof options.extra_liners === 'string') ?
         options.extra_liners.split(',') : 'head,body,/html'.split(',');
     eol = options.eol ? options.eol : 'auto';
+    keep_collapsed_whitespace = (options.keep_collapsed_whitespace === undefined) ? false : options.keep_collapsed_whitespace;
 
     if (options.indent_with_tabs) {
         indent_character = '\t';
@@ -301,6 +302,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         this.indent_content = indent_inner_html;
         this.indent_body_inner_html = indent_body_inner_html;
         this.indent_head_inner_html = indent_head_inner_html;
+        this.should_wrap_next_tag_start = true;
 
         this.Utils = { //Uilities made available to the various functions
             whitespace: "\n\r\t ".split(''),
@@ -448,6 +450,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         };
 
         this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Object
+            tag = ltrim(rtrim(tag));
             if (this.tags[tag + 'count']) { //check for the existence of this tag type
                 this.tags[tag + 'count']++;
                 this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
@@ -1024,10 +1027,32 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
     /*_____________________--------------------_____________________*/
 
     this.beautify = function() {
+        function disallow_next_tag_wrapping_if_required() {
+          if (
+              // Should we keep whitespace between tags collapsed?
+              keep_collapsed_whitespace &&
+              // Means that there is no whitespace at the end of token_text
+              !multi_parser.is_whitespace(multi_parser.token_text[multi_parser.token_text.length - 1])
+            ) {
+              multi_parser.should_wrap_next_tag_start = false;
+            }
+        }
+
+        function print_newline_if_allowed() {
+            // Don't add a newline before tag which has a direct previous sibling tag (this is optional).
+            if (
+              !keep_collapsed_whitespace ||
+              multi_parser.should_wrap_next_tag_start
+            ) {
+              multi_parser.print_newline(false, multi_parser.output);
+            }
+        }
+
         multi_parser = new Parser(); //wrapping functions Parser
         multi_parser.printer(html_source, indent_character, indent_size, wrap_line_length, brace_style); //initialize starting values
         while (true) {
             var t = multi_parser.get_token();
+            var token_was_matched = true;
             multi_parser.token_text = t[0];
             multi_parser.token_type = t[1];
 
@@ -1037,7 +1062,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
 
             switch (multi_parser.token_type) {
                 case 'TK_TAG_START':
-                    multi_parser.print_newline(false, multi_parser.output);
+                    print_newline_if_allowed();
                     multi_parser.print_token(multi_parser.token_text);
                     if (multi_parser.indent_content) {
                         if ((multi_parser.indent_body_inner_html || !multi_parser.token_text.match(/<body(?:.*)>/)) &&
@@ -1057,6 +1082,8 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'CONTENT';
                     break;
                 case 'TK_TAG_END':
+                    disallow_next_tag_wrapping_if_required();
+
                     //Print new line only if the tag has no content and has child
                     if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
                         var tag_name = (multi_parser.token_text.match(/\w+/) || [])[0];
@@ -1076,8 +1103,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     // Don't add a newline before elements that should remain unformatted.
                     var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i);
                     if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) {
-                        multi_parser.print_newline(false, multi_parser.output);
+                        print_newline_if_allowed();
                     }
+
+                    disallow_next_tag_wrapping_if_required();
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'CONTENT';
                     break;
@@ -1109,6 +1139,10 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 case 'TK_CONTENT':
+                    if (multi_parser.token_text.length !== 0) {
+                      multi_parser.should_wrap_next_tag_start = true;
+                    }
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'TAG';
                     break;
@@ -1158,13 +1192,26 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 default:
+                    token_was_matched = false;
                     // We should not be getting here but we don't want to drop input on the floor
                     // Just output the text and move on
                     if (multi_parser.token_text !== '') {
                         multi_parser.print_token(multi_parser.token_text);
                     }
                     break;
             }
+
+            if (
+              (token_was_matched &&
+              multi_parser.token_type !== 'TK_TAG_SINGLE' &&
+              multi_parser.token_type !== 'TK_TAG_END' &&
+              multi_parser.token_type !== 'TK_CONTENT') ||
+
+              !token_was_matched
+            ) {
+              multi_parser.should_wrap_next_tag_start = true;
+            }
+
             multi_parser.last_token = multi_parser.token_type;
             multi_parser.last_text = multi_parser.token_text;
         }

diff --git a/js/lib/cli.js b/js/lib/cli.js
@@ -101,6 +101,7 @@ var path = require('path'),
         "indent_handlebars": [Boolean],
         "indent_scripts": ["keep", "separate", "normal"],
         "extra_liners": [String, Array],
+        "keep_collapsed_whitespace": Boolean,
         // CLI
         "version": Boolean,
         "help": Boolean,
@@ -150,6 +151,7 @@ var path = require('path'),
         "H": ["--indent_handlebars"],
         "S": ["--indent_scripts"],
         "E": ["--extra_liners"],
+        "K": ["--keep_collapsed_whitespace"],
         // non-dasherized hybrid shortcuts
         "good-stuff": [
             "--keep_array_indentation",
@@ -373,6 +375,7 @@ function usage(err) {
             msg.push('  -U, --unformatted                 List of tags (defaults to inline) that should not be reformatted');
             msg.push('  -T, --content_unformatted         List of tags (defaults to pre) whose content should not be reformatted');
             msg.push('  -E, --extra_liners                List of tags (defaults to [head,body,/html] that should have an extra newline');
+            msg.push('  -K, --keep_collapsed_whitespace   Don\'t add a newline before tag which has a direct previous sibling tag. Default is false.');
             break;
         case "css":
             msg.push('  -L, --selector-separator-newline        Add a newline between multiple selectors.');

diff --git a/js/src/html/beautifier.js b/js/src/html/beautifier.js
@@ -119,6 +119,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         options.extra_liners.concat() : (typeof options.extra_liners === 'string') ?
         options.extra_liners.split(',') : 'head,body,/html'.split(',');
     eol = options.eol ? options.eol : 'auto';
+    keep_collapsed_whitespace = (options.keep_collapsed_whitespace === undefined) ? false : options.keep_collapsed_whitespace;
 
     if (options.indent_with_tabs) {
         indent_character = '\t';
@@ -153,6 +154,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         this.indent_content = indent_inner_html;
         this.indent_body_inner_html = indent_body_inner_html;
         this.indent_head_inner_html = indent_head_inner_html;
+        this.should_wrap_next_tag_start = true;
 
         this.Utils = { //Uilities made available to the various functions
             whitespace: "\n\r\t ".split(''),
@@ -300,6 +302,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         };
 
         this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Object
+            tag = ltrim(rtrim(tag));
             if (this.tags[tag + 'count']) { //check for the existence of this tag type
                 this.tags[tag + 'count']++;
                 this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
@@ -876,10 +879,32 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
     /*_____________________--------------------_____________________*/
 
     this.beautify = function() {
+        function disallow_next_tag_wrapping_if_required() {
+          if (
+              // Should we keep whitespace between tags collapsed?
+              keep_collapsed_whitespace &&
+              // Means that there is no whitespace at the end of token_text
+              !multi_parser.is_whitespace(multi_parser.token_text[multi_parser.token_text.length - 1])
+            ) {
+              multi_parser.should_wrap_next_tag_start = false;
+            }
+        }
+
+        function print_newline_if_allowed() {
+            // Don't add a newline before tag which has a direct previous sibling tag (this is optional).
+            if (
+              !keep_collapsed_whitespace ||
+              multi_parser.should_wrap_next_tag_start
+            ) {
+              multi_parser.print_newline(false, multi_parser.output);
+            }
+        }
+
         multi_parser = new Parser(); //wrapping functions Parser
         multi_parser.printer(html_source, indent_character, indent_size, wrap_line_length, brace_style); //initialize starting values
         while (true) {
             var t = multi_parser.get_token();
+            var token_was_matched = true;
             multi_parser.token_text = t[0];
             multi_parser.token_type = t[1];
 
@@ -889,7 +914,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
 
             switch (multi_parser.token_type) {
                 case 'TK_TAG_START':
-                    multi_parser.print_newline(false, multi_parser.output);
+                    print_newline_if_allowed();
                     multi_parser.print_token(multi_parser.token_text);
                     if (multi_parser.indent_content) {
                         if ((multi_parser.indent_body_inner_html || !multi_parser.token_text.match(/<body(?:.*)>/)) &&
@@ -909,6 +934,8 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'CONTENT';
                     break;
                 case 'TK_TAG_END':
+                    disallow_next_tag_wrapping_if_required();
+
                     //Print new line only if the tag has no content and has child
                     if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
                         var tag_name = (multi_parser.token_text.match(/\w+/) || [])[0];
@@ -928,8 +955,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     // Don't add a newline before elements that should remain unformatted.
                     var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i);
                     if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) {
-                        multi_parser.print_newline(false, multi_parser.output);
+                        print_newline_if_allowed();
                     }
+
+                    disallow_next_tag_wrapping_if_required();
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'CONTENT';
                     break;
@@ -961,6 +991,10 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 case 'TK_CONTENT':
+                    if (multi_parser.token_text.length !== 0) {
+                      multi_parser.should_wrap_next_tag_start = true;
+                    }
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'TAG';
                     break;
@@ -1010,13 +1044,26 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 default:
+                    token_was_matched = false;
                     // We should not be getting here but we don't want to drop input on the floor
                     // Just output the text and move on
                     if (multi_parser.token_text !== '') {
                         multi_parser.print_token(multi_parser.token_text);
                     }
                     break;
             }
+
+            if (
+              (token_was_matched &&
+              multi_parser.token_type !== 'TK_TAG_SINGLE' &&
+              multi_parser.token_type !== 'TK_TAG_END' &&
+              multi_parser.token_type !== 'TK_CONTENT') ||
+
+              !token_was_matched
+            ) {
+              multi_parser.should_wrap_next_tag_start = true;
+            }
+
             multi_parser.last_token = multi_parser.token_type;
             multi_parser.last_text = multi_parser.token_text;
         }

diff --git a/js/test/generated/beautify-html-tests.js b/js/test/generated/beautify-html-tests.js
@@ -2987,6 +2987,90 @@ function run_html_tests(test_obj, Urlencoded, js_beautify, html_beautify, css_be
             '</div>');
 
 
+        //============================================================
+        // keep_collapsed_whitespace: prevent wrapping in case of collapsed tags
+        reset_options();
+        opts.keep_collapsed_whitespace = true;
+        opts.unformatted = [];
+        test_fragment(
+            '<div>Beautify</div> <div>me</div> <p>But</p><div>not</div><i>me</i> <span>Also, wrap me</span>',
+            //  -- output --
+            '<div>Beautify</div>\n' +
+            '<div>me</div>\n' +
+            '<p>But</p><div>not</div><i>me</i>\n' +
+            '<span>Also, wrap me</span>');
+        test_fragment(
+            '<input value="Collapsed single tags..." type="text"><img src="" alt="...are not wrapped"/> <input value="This single tag is wrapped" type="text">',
+            //  -- output --
+            '<input value="Collapsed single tags..." type="text"><img src="" alt="...are not wrapped" />\n' +
+            '<input value="This single tag is wrapped" type="text">');
+        test_fragment(
+            '<div>\n' +
+            '    keep\n' +
+            '</div\n' +
+            '><div>\n' +
+            '    tags\n' +
+            '</div\n' +
+            '><div>\n' +
+            '    collapsed\n' +
+            '</div>',
+            //  -- output --
+            '<div>\n' +
+            '    keep\n' +
+            '</div><div>\n' +
+            '    tags\n' +
+            '</div><div>\n' +
+            '    collapsed\n' +
+            '</div>');
+        test_fragment('<label>Type to the right: </label><input type="text">');
+        test_fragment('<input type="text"><label> - type to the left</label>');
+
+
+        //============================================================
+        // default keep_collapsed_whitespace
+        reset_options();
+        opts.unformatted = [];
+        test_fragment(
+            '<div>Beautify</div> <div>me</div> <p>And</p><div>me</div><i>also</i>',
+            //  -- output --
+            '<div>Beautify</div>\n' +
+            '<div>me</div>\n' +
+            '<p>And</p>\n' +
+            '<div>me</div>\n' +
+            '<i>also</i>');
+        test_fragment(
+            '<input value="Collapsed single tags..." type="text"><img src="" alt="...are wrapped"/> <input value="This single tag is wrapped" type="text">',
+            //  -- output --
+            '<input value="Collapsed single tags..." type="text">\n' +
+            '<img src="" alt="...are wrapped" />\n' +
+            '<input value="This single tag is wrapped" type="text">');
+        test_fragment(
+            '<label>Wrap</label><input value="us" type="text">',
+            //  -- output --
+            '<label>Wrap</label>\n' +
+            '<input value="us" type="text">');
+        test_fragment(
+            '<div>\n' +
+            '    all\n' +
+            '</div\n' +
+            '><div>\n' +
+            '    tags\n' +
+            '</div\n' +
+            '><div>\n' +
+            '    are wrapped\n' +
+            '</div>',
+            //  -- output --
+            '<div>\n' +
+            '    all\n' +
+            '</div>\n' +
+            '<div>\n' +
+            '    tags\n' +
+            '</div>\n' +
+            '<div>\n' +
+            '    are wrapped\n' +
+            '</div>');
+
+
         //============================================================
         // New Test Suite
         reset_options();