From 1f78d8c4e3f1f8017286144e6563cbad80d4f439 Mon Sep 17 00:00:00 2001 From: John-Paul Dakran Date: Thu, 6 Jan 2022 11:50:00 -0800 Subject: [PATCH 1/7] Changed pop to popleft so items in a list are retrieved in order --- detect_secrets/transformers/yaml.py | 2 +- tests/__init__.py | 0 tests/transformers/yaml_transformer_test.py | 20 ++++++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tests/__init__.py diff --git a/detect_secrets/transformers/yaml.py b/detect_secrets/transformers/yaml.py index 6680d44a..f47b11e9 100644 --- a/detect_secrets/transformers/yaml.py +++ b/detect_secrets/transformers/yaml.py @@ -164,7 +164,7 @@ def __iter__(self) -> Iterator[YAMLValue]: to_search = deque([self.json()]) while to_search: - item: Any = to_search.pop() + item: Any = to_search.popleft() if not item: # mainly for base case (e.g. if file is all comments) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/transformers/yaml_transformer_test.py b/tests/transformers/yaml_transformer_test.py index 54cf8d65..ac02deba 100644 --- a/tests/transformers/yaml_transformer_test.py +++ b/tests/transformers/yaml_transformer_test.py @@ -80,6 +80,26 @@ def test_multiline_block_scalar_literal_style(block_chomping): assert YAMLTransformer().parse_file(file) == [''] + @staticmethod + def test_single_line_flow_mapping(): + file = mock_file_object( + textwrap.dedent(""" + batch: + cpus: 1 + extra_volumes: + - {containerPath: /nail/tmp, hostPath: /nail/tmp, mode: RW} + """)[1:-1], + ) + + assert YAMLTransformer().parse_file(file) == [ + '', + '', + '', + 'containerPath: "/nail/tmp"', + 'hostPath: "/nail/tmp"', + 'mode: "RW"', + ] + class TestYAMLFileParser: @staticmethod From b9d5edccbd6531e40086cdaa94ab8d17230188e6 Mon Sep 17 00:00:00 2001 From: John-Paul Dakran Date: Thu, 6 Jan 2022 12:28:54 -0800 Subject: [PATCH 2/7] Update unit tests --- tests/transformers/yaml_transformer_test.py | 204 +++++++++++++++++++- 1 file changed, 197 insertions(+), 7 deletions(-) diff --git a/tests/transformers/yaml_transformer_test.py b/tests/transformers/yaml_transformer_test.py index ac02deba..a0b15af4 100644 --- a/tests/transformers/yaml_transformer_test.py +++ b/tests/transformers/yaml_transformer_test.py @@ -84,10 +84,10 @@ def test_multiline_block_scalar_literal_style(block_chomping): def test_single_line_flow_mapping(): file = mock_file_object( textwrap.dedent(""" - batch: - cpus: 1 - extra_volumes: - - {containerPath: /nail/tmp, hostPath: /nail/tmp, mode: RW} + object: + keyA: 1 + dictionary: + - {keyB: valueB, keyC: valueC, keyD: valueD} """)[1:-1], ) @@ -95,9 +95,30 @@ def test_single_line_flow_mapping(): '', '', '', - 'containerPath: "/nail/tmp"', - 'hostPath: "/nail/tmp"', - 'mode: "RW"', + 'keyB: "valueB"', + 'keyC: "valueC"', + 'keyD: "valueD"', + ] + + @staticmethod + def test_multi_line_flow_mapping(): + file = mock_file_object( + textwrap.dedent(""" + object: + keyA: 1 + dictionary: + - {keyB: valueB, keyC: valueC, keyD: valueD} + + """)[1:-1], + ) + + assert YAMLTransformer().parse_file(file) == [ + '', + '', + '', + 'keyB: "valueB"', + 'keyC: "valueC"', + 'keyD: "valueD"', ] @@ -167,3 +188,172 @@ def test_possible_secret_format(yaml_value, expected_value): '__line__': mock.ANY, '__original_key__': mock.ANY, } + + @staticmethod + @pytest.mark.parametrize( + 'content, expected', + ( + # NOTE: The trailing new lines are important here! + # It needs to be a string value, since we ignore non-string values (because we assume + # secrets will be strings). However, the combination of the dictionary start character + # `{` and the keys being on the same line causes unexpected results (see #374). + ( + textwrap.dedent(""" + { a: "1" } + """)[1:], + ['a: "1"'], + ), + ( + textwrap.dedent(""" + a: + {b: "2"} + """)[1:], + ['', 'b: "2"'], + ), + ( + textwrap.dedent(""" + a: + - {b: "2"} + """)[1:], + ['', 'b: "2"'], + ), + # New lines aren't important here, but since the first key is on the same line + # as the start of the block, it will be handled funkily. + ( + textwrap.dedent(""" + {a: "1", + b: "2", + } + """)[1:-1], + ['a: "1"', 'b: "2"'], + ), + ( + textwrap.dedent(""" + { + a: "1", + b: "2", + } + """)[1:], + ['', 'a: "1"', 'b: "2"'], + ), + ( + textwrap.dedent(""" + { a: "1", b: "2" } + """)[1:], + ['a: "1"', 'b: "2"'], + ), + ), + ) + def test_inline_dictionary(content, expected): + lines = YAMLTransformer().parse_file(mock_file_object(content)) + assert lines == expected + + @staticmethod + def test_single_line_flow_mapping(): + file = mock_file_object( + textwrap.dedent(""" + dictionary: + - {keyA: valueA, keyB: valueB, keyC: valueC} + """)[1:-1], + ) + + assert YAMLFileParser(file).json() == { + 'extra_volumes': [ + { + 'keyA': { + '__value__': 'valueA', + '__line__': 2, + '__original_key__': 'keyA', + }, + 'keyB': { + '__value__': 'valueB', + '__line__': 2, + '__original_key__': 'keyB', + }, + 'keyC': { + '__value__': 'valueC', + '__line__': 2, + '__original_key__': 'keyC', + }, + }, + ], + } + + @staticmethod + def test_multi_line_flow_mapping(): + file = mock_file_object( + textwrap.dedent(""" + dictionary: + - {keyA: valueA, keyB: valueB, keyC: valueC} + + """)[1:-1], + ) + + assert YAMLFileParser(file).json() == { + 'extra_volumes': [ + { + 'keyA': { + '__value__': 'valueA', + '__line__': 2, + '__original_key__': 'keyA', + }, + 'keyB': { + '__value__': 'valueB', + '__line__': 2, + '__original_key__': 'keyB', + }, + 'keyC': { + '__value__': 'valueC', + '__line__': 2, + '__original_key__': 'keyC', + }, + }, + ], + } + + @staticmethod + def test_inline_dictionary_same_starting_line(): + file = mock_file_object( + textwrap.dedent(""" + {a: "1", + b: "2", + } + """)[1:-1], + ) + + assert YAMLFileParser(file).json() == { + 'a': { + '__value__': '1', + '__line__': 1, + '__original_key__': 'a', + }, + 'b': { + '__value__': '2', + '__line__': 2, + '__original_key__': 'b', + }, + } + + @staticmethod + def test_inline_dictionary_different_starting_line(): + file = mock_file_object( + textwrap.dedent(""" + { + a: "1", + b: "2", + } + """)[1:-1], + ) + + assert YAMLFileParser(file).json() == { + 'a': { + '__value__': '1', + '__line__': 2, + '__original_key__': 'a', + }, + 'b': { + '__value__': '2', + '__line__': 3, + '__original_key__': 'b', + }, + } From f2f51269c76c87655f109ed4060d0e0ed616e5ee Mon Sep 17 00:00:00 2001 From: John-Paul Dakran Date: Thu, 6 Jan 2022 12:37:56 -0800 Subject: [PATCH 3/7] Handle inline flow mapping by calcualting correct line number --- detect_secrets/transformers/yaml.py | 46 ++++++++++++++++++++- tests/transformers/yaml_transformer_test.py | 4 +- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/detect_secrets/transformers/yaml.py b/detect_secrets/transformers/yaml.py index f47b11e9..ca70c050 100644 --- a/detect_secrets/transformers/yaml.py +++ b/detect_secrets/transformers/yaml.py @@ -14,6 +14,8 @@ from typing import Union import yaml +from yaml.tokens import FlowEntryToken +from yaml.tokens import KeyToken from ..types import NamedIO from ..util.filetype import determine_file_type @@ -152,6 +154,9 @@ def __init__(self, file: NamedIO): self.loader = yaml.SafeLoader(self.content) self.loader.compose_node = self._compose_node_shim # type: ignore + self.is_inline_flow_mapping_key = False + self.loader.parse_flow_mapping_key = self._parse_flow_mapping_key_shim # type: ignore + def json(self) -> Dict[str, Any]: return cast(Dict[str, Any], self.loader.get_single_data()) @@ -206,7 +211,11 @@ def _compose_node_shim( parent: Optional[yaml.nodes.Node], index: Optional[yaml.nodes.Node], ) -> yaml.nodes.Node: - line = self.loader.line + line = ( + self.loader.marks[-1].line + if self.is_inline_flow_mapping_key + else self.loader.line + ) node = yaml.composer.Composer.compose_node(self.loader, parent, index) node.__line__ = line + 1 @@ -218,6 +227,41 @@ def _compose_node_shim( return cast(yaml.nodes.Node, node) + def _parse_flow_mapping_key_shim( + self, + first: bool = False, + ) -> yaml.nodes.Node: + if ( + first + and self.loader.marks[-1].line == self.loader.peek_token().start_mark.line + or self._check_next_tokens_shim(FlowEntryToken, KeyToken) + ): + self.is_inline_flow_mapping_key = True + else: + self.is_inline_flow_mapping_key = False + + return cast(yaml.nodes.Node, yaml.parser.Parser.parse_flow_mapping_key(self.loader, first)) + + def _check_next_tokens_shim( + self, + *choices: Any, + ) -> bool: + # Check the next tokens type match the argument list of token types + result = True + i = 0 + + if self.loader.tokens: + if not choices: + return result + for choice in choices: + if i < len(self.loader.tokens): + result = result and isinstance(self.loader.tokens[i], choice) + i += 1 + else: + result = False + + return result + def _tag_dict_values(map_node: yaml.nodes.MappingNode) -> yaml.nodes.MappingNode: """ diff --git a/tests/transformers/yaml_transformer_test.py b/tests/transformers/yaml_transformer_test.py index a0b15af4..b23e9edd 100644 --- a/tests/transformers/yaml_transformer_test.py +++ b/tests/transformers/yaml_transformer_test.py @@ -258,7 +258,7 @@ def test_single_line_flow_mapping(): ) assert YAMLFileParser(file).json() == { - 'extra_volumes': [ + 'dictionary': [ { 'keyA': { '__value__': 'valueA', @@ -290,7 +290,7 @@ def test_multi_line_flow_mapping(): ) assert YAMLFileParser(file).json() == { - 'extra_volumes': [ + 'dictionary': [ { 'keyA': { '__value__': 'valueA', From 1392834b89d44aabe3542e10e4e96ae4a21b69d9 Mon Sep 17 00:00:00 2001 From: John-Paul Dakran Date: Thu, 6 Jan 2022 12:49:57 -0800 Subject: [PATCH 4/7] Fix trailing whitespace and trailing commas --- README.md | 2 +- detect_secrets/core/secrets_collection.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8ee65afa..e4f411b2 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ $ git diff --staged --name-only -z | xargs -0 detect-secrets-hook --baseline .se **Scanning All Tracked Files:** ```bash -$ git ls-files -z | xargs -0 detect-secrets-hook --baseline .secrets.baseline +$ git ls-files -z | xargs -0 detect-secrets-hook --baseline .secrets.baseline ``` ### Viewing All Enabled Plugins: diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index a794ede7..05f770a7 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -219,8 +219,8 @@ def __iter__(self) -> Generator[Tuple[str, PotentialSecret], None, None]: key=lambda secret: ( getattr(secret, 'line_number', 0), secret.secret_hash, - secret.type - ) + secret.type, + ), ): yield filename, secret From e1240d522527746e800e075fb2d834ff1a177333 Mon Sep 17 00:00:00 2001 From: John-Paul Dakran Date: Thu, 6 Jan 2022 13:13:36 -0800 Subject: [PATCH 5/7] Change tuple to match. This parameter should be the list of tuples which is match not tuple --- detect_secrets/plugins/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect_secrets/plugins/base.py b/detect_secrets/plugins/base.py index dbffec1f..0ef7925e 100644 --- a/detect_secrets/plugins/base.py +++ b/detect_secrets/plugins/base.py @@ -146,7 +146,7 @@ def analyze_string(self, string: str) -> Generator[str, None, None]: for regex in self.denylist: for match in regex.findall(string): if isinstance(match, tuple): - for submatch in filter(bool, tuple): + for submatch in filter(bool, match): # It might make sense to paste break after yielding yield submatch else: From 61eb62a8fa798b1da8bf70b47640ccfa673b72e6 Mon Sep 17 00:00:00 2001 From: John-Paul Dakran Date: Fri, 7 Jan 2022 13:58:19 -0800 Subject: [PATCH 6/7] Update dependencies --- requirements-dev.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 22e926d9..86427521 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -18,7 +18,7 @@ monotonic==1.5 mypy==0.790 mypy-extensions==0.4.3 nodeenv==1.5.0 -packaging==20.7 +packaging==20.9 pluggy==0.13.1 pre-commit==2.9.2 py==1.10.0 @@ -26,7 +26,7 @@ pyahocorasick==1.4.0 pycodestyle==2.3.1 pyflakes==1.6.0 pyparsing==2.4.7 -pytest==6.1.2 +pytest==6.2.2 PyYAML==5.4 requests==2.25.0 responses==0.12.1 @@ -38,5 +38,5 @@ typed-ast==1.4.1 typing-extensions==3.7.4.3 unidiff==0.6.0 urllib3==1.26.4 -virtualenv==20.2.1 +virtualenv==20.6.0 zipp==3.4.0 From f295adadb7b5b40f3a24d574a7a3d0988d943860 Mon Sep 17 00:00:00 2001 From: John-Paul Dakran Date: Mon, 10 Jan 2022 13:01:44 -0800 Subject: [PATCH 7/7] Add description to _parse_flow_mapping_key_shim which handles parser edge case --- detect_secrets/transformers/yaml.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/detect_secrets/transformers/yaml.py b/detect_secrets/transformers/yaml.py index ca70c050..d8039a99 100644 --- a/detect_secrets/transformers/yaml.py +++ b/detect_secrets/transformers/yaml.py @@ -231,6 +231,14 @@ def _parse_flow_mapping_key_shim( self, first: bool = False, ) -> yaml.nodes.Node: + # There exists an edge case when a key and flow mapping start character `{` are on the same + # line (Ex. '{key: value}) followed by an empty line. The parser will produce an off-by-one + # error for the line number that it tracks internally. Since we track the start of the + # mapping, we will use this line number when we are processing: + # A) The first key in an inline dictionary where the flow mapping start character is on the + # same line as the key + # B) The n key of an inline dictionary that is followed by a FlowEntryToken (',') and + # KeyToken ('key:') if ( first and self.loader.marks[-1].line == self.loader.peek_token().start_mark.line