From 265a121c68a1928f2a97d54918bacb24dcac8c3c Mon Sep 17 00:00:00 2001 From: gkumar9891 Date: Sat, 24 Feb 2024 17:34:39 +0530 Subject: [PATCH 1/7] added option disallowedTagsMode: 'completelyDiscard' --- index.js | 10 ++++++---- test/test.js | 10 +++++++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/index.js b/index.js index 62404fa..1144ce8 100644 --- a/index.js +++ b/index.js @@ -262,7 +262,7 @@ function sanitizeHtml(html, options, _recursing) { if (!tagAllowed(name) || (options.disallowedTagsMode === 'recursiveEscape' && !isEmptyObject(skipMap)) || (options.nestingLimit != null && depth >= options.nestingLimit)) { skip = true; skipMap[depth] = true; - if (options.disallowedTagsMode === 'discard') { + if (options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') { if (nonTextTagsArray.indexOf(name) !== -1) { skipText = true; skipTextDepth = 1; @@ -272,7 +272,7 @@ function sanitizeHtml(html, options, _recursing) { } depth++; if (skip) { - if (options.disallowedTagsMode === 'discard') { + if (options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') { // We want the contents but not this tag return; } @@ -511,7 +511,9 @@ function sanitizeHtml(html, options, _recursing) { text = lastFrame.innerText !== undefined ? lastFrame.innerText : text; } - if (options.disallowedTagsMode === 'discard' && ((tag === 'script') || (tag === 'style'))) { + if (options.disallowedTagsMode === 'completelyDiscard' && !tagAllowed(tag)) { + text = ''; + } else if ((options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') && ((tag === 'script') || (tag === 'style'))) { // htmlparser2 gives us these as-is. Escaping them ruins the content. Allowing // script tags is, by definition, game over for XSS protection, so if that's // your concern, don't allow them. The same is essentially true for style tags @@ -559,7 +561,7 @@ function sanitizeHtml(html, options, _recursing) { const skip = skipMap[depth]; if (skip) { delete skipMap[depth]; - if (options.disallowedTagsMode === 'discard') { + if (options.disallowedTagsMode === 'discard' || options.disallowedTagsMode === 'completelyDiscard') { frame.updateParentNodeText(); return; } diff --git a/test/test.js b/test/test.js index 9891a8e..e1a70ef 100644 --- a/test/test.js +++ b/test/test.js @@ -1667,5 +1667,13 @@ describe('sanitizeHtml', function() { } }), ''); }); - + it('should completely remove disallowed tags with nested content', () => { + const inputHtml = '
Some Text

Allowed content

More allowed content Another Text
'; + const expectedOutput = '

Allowed content

More allowed content'; + const sanitizedHtml = sanitizeHtml(inputHtml, { + allowedTags: [ 'p', 'span' ], + disallowedTagsMode: 'completelyDiscard' + }); + assert.equal(sanitizedHtml, expectedOutput); + }); }); From d3c11413b1bb35c92edcefaf1e988b205d0114bb Mon Sep 17 00:00:00 2001 From: gkumar9891 Date: Wed, 28 Feb 2024 01:47:02 +0530 Subject: [PATCH 2/7] changed CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46178f5..d9c72ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ - Documentation update regarding minimum supported TypeScript version. +- Added disallowedTagsMode: 'completelyDiscard' option to remove the content also in html + ## 2.12.1 (2024-02-22) - Do not parse sourcemaps in `post-css`. This fixes a vulnerability in which information about the existence or non-existence of files on a server could be disclosed via properly crafted HTML input when the `style` attribute is allowed by the configuration. Thanks to the [Snyk Security team](https://snyk.io/) for the disclosure and to [Dylan Armstrong](https://dylan.is/) for the fix. From 1f52783e1defe92b31af56cfb82148e9fb5787a1 Mon Sep 17 00:00:00 2001 From: gkumar9891 Date: Fri, 1 Mar 2024 03:15:29 +0530 Subject: [PATCH 3/7] added test case and changed README file --- README.md | 12 ++++++++++++ test/test.js | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/README.md b/README.md index 4a6b44b..1b0c312 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,16 @@ If you set `disallowedTagsMode` to `escape`, the disallowed tags are escaped rat If you set `disallowedTagsMode` to `recursiveEscape`, the disallowed tags are escaped rather than discarded, and the same treatment is applied to all subtags, whether otherwise allowed or not. + +#### "What if I wan disallowed tags and any content they contain should discarded" + +If you set `disallowedTagsMode` to `completelyDiscard`, disallowed tags and any content they contain are discarded. Any subtags are still included, as long as those individual subtags are allowed. + +```js +allowedTags: [ 'p' ], +disallowedTagsMode: 'completelyDiscard' +``` + #### "What if I want to allow only specific values on some attributes?" When configuring the attribute in `allowedAttributes` simply use an object with attribute `name` and an allowed `values` array. In the following example `sandbox="allow-forms allow-modals allow-orientation-lock allow-pointer-lock allow-popups allow-popups-to-escape-sandbox allow-scripts"` would become `sandbox="allow-popups allow-scripts"`: @@ -695,6 +705,8 @@ attacks. Don't do that* unless you have good reason to trust their origin. sanitize-html will log a warning if these tags are allowed, which can be disabled with the `allowVulnerableTags: true` option. +### Discarding the entire contents of a disallowed tag + ### Choose what to do with disallowed tags Instead of discarding, or keeping text only, you may enable escaping of the entire content: diff --git a/test/test.js b/test/test.js index e1a70ef..ae84000 100644 --- a/test/test.js +++ b/test/test.js @@ -1676,4 +1676,23 @@ describe('sanitizeHtml', function() { }); assert.equal(sanitizedHtml, expectedOutput); }); + it('should remove top level tag\'s content', () => { + const inputHtml = 'Some Text

paragraph content

content'; + const expectedOutput = '

paragraph content

'; + const sanitizedHtml = sanitizeHtml(inputHtml, { + allowedTags: [ 'p' ], + disallowedTagsMode: 'completelyDiscard' + }); + assert.equal(sanitizedHtml, expectedOutput); + }); + it('should completely remove disallowd tag with unclosed tag', () => { + const inputHtml = '
Some Text

paragraph content

some text'; + const expectedOutput = '

paragraph content

'; + const sanitizedHtml = sanitizeHtml(inputHtml, { + allowedTags: [ 'p' ], + disallowedTagsMode: 'completelyDiscard' + }); + + assert.equal(sanitizedHtml, expectedOutput); + }); }); From 7d30949fadc8ddd69457ac6b09c27e41ff4e036b Mon Sep 17 00:00:00 2001 From: gkumar9891 Date: Sat, 2 Mar 2024 04:45:13 +0530 Subject: [PATCH 4/7] changed README --- README.md | 43 ++++++++++++++++++++++++++++++------------- test/test.js | 2 +- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 1b0c312..3602750 100644 --- a/README.md +++ b/README.md @@ -249,16 +249,6 @@ If you set `disallowedTagsMode` to `escape`, the disallowed tags are escaped rat If you set `disallowedTagsMode` to `recursiveEscape`, the disallowed tags are escaped rather than discarded, and the same treatment is applied to all subtags, whether otherwise allowed or not. - -#### "What if I wan disallowed tags and any content they contain should discarded" - -If you set `disallowedTagsMode` to `completelyDiscard`, disallowed tags and any content they contain are discarded. Any subtags are still included, as long as those individual subtags are allowed. - -```js -allowedTags: [ 'p' ], -disallowedTagsMode: 'completelyDiscard' -``` - #### "What if I want to allow only specific values on some attributes?" When configuring the attribute in `allowedAttributes` simply use an object with attribute `name` and an allowed `values` array. In the following example `sandbox="allow-forms allow-modals allow-orientation-lock allow-pointer-lock allow-popups allow-popups-to-escape-sandbox allow-scripts"` would become `sandbox="allow-popups allow-scripts"`: @@ -705,8 +695,6 @@ attacks. Don't do that* unless you have good reason to trust their origin. sanitize-html will log a warning if these tags are allowed, which can be disabled with the `allowVulnerableTags: true` option. -### Discarding the entire contents of a disallowed tag - ### Choose what to do with disallowed tags Instead of discarding, or keeping text only, you may enable escaping of the entire content: @@ -717,7 +705,36 @@ disallowedTagsMode: 'escape' This will transform `content` to `<disallowed>content</disallowed>` -Valid values are: `'discard'` (default), `'escape'` (escape the tag) and `'recursiveEscape'` (to escape the tag and all its content). +Valid values are: `'discard'` (default), `'completelyDiscard'` (remove disallowed tag's content), `'escape'` (escape the tag) and `'recursiveEscape'` (to escape the tag and all its content). + +#### Discard disallowed tag but keep its inner content + +If you set `disallowedTagsMode` to `discard`, disallowed tags are discarded but don't remove inner content of disallowed tags. + +```js +disallowedTagsMode: 'discard' +``` +This will tranform `content` to `content` + +#### Discard entire content of a disallowed tag + +If you set `disallowedTagsMode` to `completelyDiscard`, disallowed tags and any content they contain are discarded. Any subtags are still included, as long as those individual subtags are allowed. + +```js +disallowedTagsMode: 'completelyDiscard' +``` + +this will transform `content content ` to `content` + +#### Escape the disallowed tag and all its children even for allowed tags. + +if you set `disallowedTagsMode` to `recursiveEscape`, disallowed tag and its children will be escaped even for allowed tags + +```js +disallowedTagsMode: `recursiveEscape` +``` + +this will transform `hello

world

` to `<disallowed>hello<p>world</p></disallowed>` ### Ignore style attribute contents diff --git a/test/test.js b/test/test.js index ae84000..9c87be3 100644 --- a/test/test.js +++ b/test/test.js @@ -1685,7 +1685,7 @@ describe('sanitizeHtml', function() { }); assert.equal(sanitizedHtml, expectedOutput); }); - it('should completely remove disallowd tag with unclosed tag', () => { + it('should completely remove disallowed tag with unclosed tag', () => { const inputHtml = '
Some Text

paragraph content

some text'; const expectedOutput = '

paragraph content

'; const sanitizedHtml = sanitizeHtml(inputHtml, { From d6cd120c3c8da9c712848e83c544a2e4e1acc4b2 Mon Sep 17 00:00:00 2001 From: gkumar9891 Date: Tue, 5 Mar 2024 01:57:21 +0530 Subject: [PATCH 5/7] README changes --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 3602750..6c54b1d 100644 --- a/README.md +++ b/README.md @@ -245,6 +245,8 @@ allowedAttributes: {} If you set `disallowedTagsMode` to `discard` (the default), disallowed tags are discarded. Any text content or subtags are still included, depending on whether the individual subtags are allowed. +If you set `disallowedTagsMode` to `completelyDiscard`, disallowed tags and any content they contain are discarded. Any subtags are still included, as long as those individual subtags are allowed. + If you set `disallowedTagsMode` to `escape`, the disallowed tags are escaped rather than discarded. Any text or subtags are handled normally. If you set `disallowedTagsMode` to `recursiveEscape`, the disallowed tags are escaped rather than discarded, and the same treatment is applied to all subtags, whether otherwise allowed or not. From eb4b10c3ff738b6ad1df435d8f828633835b1bf7 Mon Sep 17 00:00:00 2001 From: gkumar9891 Date: Wed, 6 Mar 2024 01:45:27 +0530 Subject: [PATCH 6/7] README changes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6c54b1d..6720fdf 100644 --- a/README.md +++ b/README.md @@ -709,14 +709,14 @@ This will transform `content` to `<disallowed>con Valid values are: `'discard'` (default), `'completelyDiscard'` (remove disallowed tag's content), `'escape'` (escape the tag) and `'recursiveEscape'` (to escape the tag and all its content). -#### Discard disallowed tag but keep its inner content +#### Discard disallowed but but the inner content of disallowed tags is kept. If you set `disallowedTagsMode` to `discard`, disallowed tags are discarded but don't remove inner content of disallowed tags. ```js disallowedTagsMode: 'discard' ``` -This will tranform `content` to `content` +This will transform `content` to `content` #### Discard entire content of a disallowed tag From 31aebaeb4e03a4aba7c1ad8e8ff553034cde63ac Mon Sep 17 00:00:00 2001 From: gkumar9891 Date: Wed, 6 Mar 2024 01:48:18 +0530 Subject: [PATCH 7/7] README changes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6720fdf..5640336 100644 --- a/README.md +++ b/README.md @@ -726,7 +726,7 @@ If you set `disallowedTagsMode` to `completelyDiscard`, disallowed tags and any disallowedTagsMode: 'completelyDiscard' ``` -this will transform `content content ` to `content` +This will transform `content content ` to `content` #### Escape the disallowed tag and all its children even for allowed tags. @@ -736,7 +736,7 @@ if you set `disallowedTagsMode` to `recursiveEscape`, disallowed tag and its chi disallowedTagsMode: `recursiveEscape` ``` -this will transform `hello

world

` to `<disallowed>hello<p>world</p></disallowed>` +This will transform `hello

world

` to `<disallowed>hello<p>world</p></disallowed>` ### Ignore style attribute contents