Add option to not wrap collapsed tags, and bugfix

See beautifier#1304
andreyvolokitin · Jan 21, 2018 · 6a57c4a · 6a57c4a
1 parent 23f3a17
commit 6a57c4a
Show file tree

Hide file tree

Showing 6 changed files with 292 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -280,6 +280,7 @@ HTML Beautifier Options:
   -U, --unformatted                  List of tags (defaults to inline) that should not be reformatted
   -T, --content_unformatted          List of tags (defaults to pre) whose content should not be reformatted
   -E, --extra_liners                 List of tags (defaults to [head,body,/html] that should have an extra newline before them.
+  -K, --keep_collapsed_whitespace    Don't add a newline before tag which has a direct previous sibling tag. Default is false.
   --editorconfig                     Use EditorConfig to set up the options
 ```
 

diff --git a/js/lib/beautify-html.js b/js/lib/beautify-html.js
@@ -267,6 +267,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         options.extra_liners.concat() : (typeof options.extra_liners === 'string') ?
         options.extra_liners.split(',') : 'head,body,/html'.split(',');
     eol = options.eol ? options.eol : 'auto';
+    keep_collapsed_whitespace = (options.keep_collapsed_whitespace === undefined) ? false : options.keep_collapsed_whitespace;
 
     if (options.indent_with_tabs) {
         indent_character = '\t';
@@ -301,6 +302,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         this.indent_content = indent_inner_html;
         this.indent_body_inner_html = indent_body_inner_html;
         this.indent_head_inner_html = indent_head_inner_html;
+        this.should_wrap_next_tag_start = true;
 
         this.Utils = { //Uilities made available to the various functions
             whitespace: "\n\r\t ".split(''),
@@ -448,6 +450,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         };
 
         this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Object
+            tag = ltrim(rtrim(tag));
             if (this.tags[tag + 'count']) { //check for the existence of this tag type
                 this.tags[tag + 'count']++;
                 this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
@@ -949,11 +952,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
             }
 
             this.print_newline = function(force, arr) {
-                this.line_char_count = 0;
                 if (!arr || !arr.length) {
                     return;
                 }
                 if (force || (arr[arr.length - 1] !== '\n')) { //we might want the extra line
+                    this.line_char_count = 0;
                     if ((arr[arr.length - 1] !== '\n')) {
                         arr[arr.length - 1] = rtrim(arr[arr.length - 1]);
                     }
@@ -1021,10 +1024,32 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
     /*_____________________--------------------_____________________*/
 
     this.beautify = function() {
+        function disallow_next_tag_wrapping_if_required() {
+          if (
+              // Should we keep whitespace between tags collapsed?
+              keep_collapsed_whitespace &&
+              // Means that there is no whitespace at the end of token_text
+              !multi_parser.is_whitespace(multi_parser.token_text[multi_parser.token_text.length - 1])
+            ) {
+              multi_parser.should_wrap_next_tag_start = false;
+            }
+        }
+
+        function print_newline_if_allowed() {
+            // Don't add a newline before tag which has a direct previous sibling tag (this is optional).
+            if (
+              !keep_collapsed_whitespace ||
+              multi_parser.should_wrap_next_tag_start
+            ) {
+              multi_parser.print_newline(false, multi_parser.output);
+            }
+        }
+
         multi_parser = new Parser(); //wrapping functions Parser
         multi_parser.printer(html_source, indent_character, indent_size, wrap_line_length, brace_style); //initialize starting values
         while (true) {
             var t = multi_parser.get_token();
+            var token_was_matched = true;
             multi_parser.token_text = t[0];
             multi_parser.token_type = t[1];
 
@@ -1034,7 +1059,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
 
             switch (multi_parser.token_type) {
                 case 'TK_TAG_START':
-                    multi_parser.print_newline(false, multi_parser.output);
+                    print_newline_if_allowed();
                     multi_parser.print_token(multi_parser.token_text);
                     if (multi_parser.indent_content) {
                         if ((multi_parser.indent_body_inner_html || !multi_parser.token_text.match(/<body(?:.*)>/)) &&
@@ -1054,6 +1079,8 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'CONTENT';
                     break;
                 case 'TK_TAG_END':
+                    disallow_next_tag_wrapping_if_required();
+
                     //Print new line only if the tag has no content and has child
                     if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
                         var tag_name = (multi_parser.token_text.match(/\w+/) || [])[0];
@@ -1073,8 +1100,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     // Don't add a newline before elements that should remain unformatted.
                     var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i);
                     if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) {
-                        multi_parser.print_newline(false, multi_parser.output);
+                        print_newline_if_allowed();
                     }
+
+                    disallow_next_tag_wrapping_if_required();
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'CONTENT';
                     break;
@@ -1106,6 +1136,10 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 case 'TK_CONTENT':
+                    if (multi_parser.token_text.length !== 0) {
+                      multi_parser.should_wrap_next_tag_start = true;
+                    }
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'TAG';
                     break;
@@ -1155,13 +1189,26 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 default:
+                    token_was_matched = false;
                     // We should not be getting here but we don't want to drop input on the floor
                     // Just output the text and move on
                     if (multi_parser.token_text !== '') {
                         multi_parser.print_token(multi_parser.token_text);
                     }
                     break;
             }
+
+            if (
+              (token_was_matched &&
+              multi_parser.token_type !== 'TK_TAG_SINGLE' &&
+              multi_parser.token_type !== 'TK_TAG_END' &&
+              multi_parser.token_type !== 'TK_CONTENT') ||
+
+              !token_was_matched
+            ) {
+              multi_parser.should_wrap_next_tag_start = true;
+            }
+
             multi_parser.last_token = multi_parser.token_type;
             multi_parser.last_text = multi_parser.token_text;
         }

diff --git a/js/lib/cli.js b/js/lib/cli.js
@@ -101,6 +101,7 @@ var path = require('path'),
         "indent_handlebars": [Boolean],
         "indent_scripts": ["keep", "separate", "normal"],
         "extra_liners": [String, Array],
+        "keep_collapsed_whitespace": Boolean,
         // CLI
         "version": Boolean,
         "help": Boolean,
@@ -150,6 +151,7 @@ var path = require('path'),
         "H": ["--indent_handlebars"],
         "S": ["--indent_scripts"],
         "E": ["--extra_liners"],
+        "K": ["--keep_collapsed_whitespace"],
         // non-dasherized hybrid shortcuts
         "good-stuff": [
             "--keep_array_indentation",
@@ -373,6 +375,7 @@ function usage(err) {
             msg.push('  -U, --unformatted                 List of tags (defaults to inline) that should not be reformatted');
             msg.push('  -T, --content_unformatted         List of tags (defaults to pre) whose content should not be reformatted');
             msg.push('  -E, --extra_liners                List of tags (defaults to [head,body,/html] that should have an extra newline');
+            msg.push('  -K, --keep_collapsed_whitespace   Don\'t add a newline before tag which has a direct previous sibling tag. Default is false.');
             break;
         case "css":
             msg.push('  -L, --selector-separator-newline        Add a newline between multiple selectors.');

diff --git a/js/src/html/beautifier.js b/js/src/html/beautifier.js
@@ -119,6 +119,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         options.extra_liners.concat() : (typeof options.extra_liners === 'string') ?
         options.extra_liners.split(',') : 'head,body,/html'.split(',');
     eol = options.eol ? options.eol : 'auto';
+    keep_collapsed_whitespace = (options.keep_collapsed_whitespace === undefined) ? false : options.keep_collapsed_whitespace;
 
     if (options.indent_with_tabs) {
         indent_character = '\t';
@@ -153,6 +154,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         this.indent_content = indent_inner_html;
         this.indent_body_inner_html = indent_body_inner_html;
         this.indent_head_inner_html = indent_head_inner_html;
+        this.should_wrap_next_tag_start = true;
 
         this.Utils = { //Uilities made available to the various functions
             whitespace: "\n\r\t ".split(''),
@@ -300,6 +302,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
         };
 
         this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Object
+            tag = ltrim(rtrim(tag));
             if (this.tags[tag + 'count']) { //check for the existence of this tag type
                 this.tags[tag + 'count']++;
                 this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
@@ -801,11 +804,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
             }
 
             this.print_newline = function(force, arr) {
-                this.line_char_count = 0;
                 if (!arr || !arr.length) {
                     return;
                 }
                 if (force || (arr[arr.length - 1] !== '\n')) { //we might want the extra line
+                    this.line_char_count = 0;
                     if ((arr[arr.length - 1] !== '\n')) {
                         arr[arr.length - 1] = rtrim(arr[arr.length - 1]);
                     }
@@ -873,10 +876,32 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
     /*_____________________--------------------_____________________*/
 
     this.beautify = function() {
+        function disallow_next_tag_wrapping_if_required() {
+          if (
+              // Should we keep whitespace between tags collapsed?
+              keep_collapsed_whitespace &&
+              // Means that there is no whitespace at the end of token_text
+              !multi_parser.is_whitespace(multi_parser.token_text[multi_parser.token_text.length - 1])
+            ) {
+              multi_parser.should_wrap_next_tag_start = false;
+            }
+        }
+
+        function print_newline_if_allowed() {
+            // Don't add a newline before tag which has a direct previous sibling tag (this is optional).
+            if (
+              !keep_collapsed_whitespace ||
+              multi_parser.should_wrap_next_tag_start
+            ) {
+              multi_parser.print_newline(false, multi_parser.output);
+            }
+        }
+
         multi_parser = new Parser(); //wrapping functions Parser
         multi_parser.printer(html_source, indent_character, indent_size, wrap_line_length, brace_style); //initialize starting values
         while (true) {
             var t = multi_parser.get_token();
+            var token_was_matched = true;
             multi_parser.token_text = t[0];
             multi_parser.token_type = t[1];
 
@@ -886,7 +911,7 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
 
             switch (multi_parser.token_type) {
                 case 'TK_TAG_START':
-                    multi_parser.print_newline(false, multi_parser.output);
+                    print_newline_if_allowed();
                     multi_parser.print_token(multi_parser.token_text);
                     if (multi_parser.indent_content) {
                         if ((multi_parser.indent_body_inner_html || !multi_parser.token_text.match(/<body(?:.*)>/)) &&
@@ -906,6 +931,8 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'CONTENT';
                     break;
                 case 'TK_TAG_END':
+                    disallow_next_tag_wrapping_if_required();
+
                     //Print new line only if the tag has no content and has child
                     if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
                         var tag_name = (multi_parser.token_text.match(/\w+/) || [])[0];
@@ -925,8 +952,11 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     // Don't add a newline before elements that should remain unformatted.
                     var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i);
                     if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) {
-                        multi_parser.print_newline(false, multi_parser.output);
+                        print_newline_if_allowed();
                     }
+
+                    disallow_next_tag_wrapping_if_required();
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'CONTENT';
                     break;
@@ -958,6 +988,10 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 case 'TK_CONTENT':
+                    if (multi_parser.token_text.length !== 0) {
+                      multi_parser.should_wrap_next_tag_start = true;
+                    }
+
                     multi_parser.print_token(multi_parser.token_text);
                     multi_parser.current_mode = 'TAG';
                     break;
@@ -1007,13 +1041,26 @@ function Beautifier(html_source, options, js_beautify, css_beautify) {
                     multi_parser.current_mode = 'TAG';
                     break;
                 default:
+                    token_was_matched = false;
                     // We should not be getting here but we don't want to drop input on the floor
                     // Just output the text and move on
                     if (multi_parser.token_text !== '') {
                         multi_parser.print_token(multi_parser.token_text);
                     }
                     break;
             }
+
+            if (
+              (token_was_matched &&
+              multi_parser.token_type !== 'TK_TAG_SINGLE' &&
+              multi_parser.token_type !== 'TK_TAG_END' &&
+              multi_parser.token_type !== 'TK_CONTENT') ||
+
+              !token_was_matched
+            ) {
+              multi_parser.should_wrap_next_tag_start = true;
+            }
+
             multi_parser.last_token = multi_parser.token_type;
             multi_parser.last_text = multi_parser.token_text;
         }