diff --git a/detect_secrets/core/baseline.py b/detect_secrets/core/baseline.py index 0f7d5e51..d63bd43a 100644 --- a/detect_secrets/core/baseline.py +++ b/detect_secrets/core/baseline.py @@ -8,39 +8,47 @@ from detect_secrets.core.secrets_collection import SecretsCollection -def initialize(plugins, exclude_regex=None, rootdir='.', scan_all_files=False): - """Scans the entire codebase for high entropy strings, and returns a +def initialize( + plugins, + exclude_regex=None, + path='.', + scan_all_files=False, +): + """Scans the entire codebase for secrets, and returns a SecretsCollection object. :type plugins: tuple of detect_secrets.plugins.base.BasePlugin :param plugins: rules to initialize the SecretsCollection with. :type exclude_regex: str|None - :type rootdir: str + :type path: str + :type scan_all_files: bool :rtype: SecretsCollection """ output = SecretsCollection(plugins, exclude_regex) - if os.path.isfile(rootdir): + if os.path.isfile(path): # This option allows for much easier adhoc usage. - git_files = [rootdir] + files_to_scan = [path] elif scan_all_files: - git_files = _get_files_recursively(rootdir) + files_to_scan = _get_files_recursively(path) else: - git_files = _get_git_tracked_files(rootdir) + files_to_scan = _get_git_tracked_files(path) - if not git_files: + if not files_to_scan: return output if exclude_regex: regex = re.compile(exclude_regex, re.IGNORECASE) - git_files = filter( - lambda x: not regex.search(x), - git_files, + files_to_scan = filter( + lambda file: ( + not regex.search(file) + ), + files_to_scan, ) - for file in git_files: + for file in files_to_scan: output.scan_file(file) return output @@ -86,7 +94,7 @@ def get_secrets_not_in_baseline(results, baseline): return new_secrets -def update_baseline_with_removed_secrets(results, baseline, filelist): +def trim_baseline_of_removed_secrets(results, baseline, filelist): """ NOTE: filelist is not a comprehensive list of all files in the repo (because we can't be sure whether --all-files is passed in as a @@ -200,7 +208,7 @@ def merge_results(old_results, new_results): continue old_secret = old_secrets_mapping[new_secret['hashed_secret']] - # Only propogate 'is_secret' if it's not already there + # Only propagate 'is_secret' if it's not already there if 'is_secret' in old_secret and 'is_secret' not in new_secret: new_secret['is_secret'] = old_secret['is_secret'] diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index fae288b2..68709153 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -66,7 +66,6 @@ def _load_baseline_from_dict(cls, data): 'exclude_regex', 'plugins_used', 'results', - 'version', )): raise IOError @@ -95,7 +94,11 @@ def _load_baseline_from_dict(cls, data): secret.secret_hash = item['hashed_secret'] result.data[filename][secret] = secret - result.version = data['version'] + result.version = ( + data['version'] + if 'version' in data + else '0.0.0' + ) return result diff --git a/detect_secrets/core/usage.py b/detect_secrets/core/usage.py index 7776bcfd..be8a18be 100644 --- a/detect_secrets/core/usage.py +++ b/detect_secrets/core/usage.py @@ -32,7 +32,7 @@ def add_console_use_arguments(self): dest='action', ) - for action_parser in [ScanOptions, AuditOptions]: + for action_parser in (ScanOptions, AuditOptions): action_parser(subparser).add_arguments() return self @@ -62,7 +62,11 @@ def _add_verbosity_argument(self): return self def _add_filenames_argument(self): - self.parser.add_argument('filenames', nargs='*', help='Filenames to check') + self.parser.add_argument( + 'filenames', + nargs='*', + help='Filenames to check', + ) return self def _add_set_baseline_argument(self): diff --git a/detect_secrets/pre_commit_hook.py b/detect_secrets/pre_commit_hook.py index a091089c..f58738cf 100644 --- a/detect_secrets/pre_commit_hook.py +++ b/detect_secrets/pre_commit_hook.py @@ -1,6 +1,5 @@ from __future__ import absolute_import -import json import subprocess import sys import textwrap @@ -8,7 +7,7 @@ from detect_secrets import VERSION from detect_secrets.core.baseline import format_baseline_for_output from detect_secrets.core.baseline import get_secrets_not_in_baseline -from detect_secrets.core.baseline import update_baseline_with_removed_secrets +from detect_secrets.core.baseline import trim_baseline_of_removed_secrets from detect_secrets.core.log import get_logger from detect_secrets.core.secrets_collection import SecretsCollection from detect_secrets.core.usage import ParserBuilder @@ -36,7 +35,8 @@ def main(argv=None): # Error logs handled within logic. return 1 - results = find_secrets_in_files(args) + plugins = initialize.from_parser_builder(args.plugins) + results = find_secrets_in_files(args, plugins) if baseline_collection: original_results = results results = get_secrets_not_in_baseline( @@ -52,12 +52,18 @@ def main(argv=None): return 0 # Only attempt baseline modifications if we don't find any new secrets - successful_update = update_baseline_with_removed_secrets( + baseline_modified = trim_baseline_of_removed_secrets( original_results, baseline_collection, args.filenames, ) - if successful_update: + + if VERSION != baseline_collection.version: + baseline_collection.plugins = plugins + baseline_collection.version = VERSION + baseline_modified = True + + if baseline_modified: _write_to_baseline_file( args.baseline[0], baseline_collection.format_for_baseline_output(), @@ -87,31 +93,13 @@ def get_baseline(baseline_filename): if not baseline_filename: return - raise_exception_if_baseline_file_is_not_up_to_date(baseline_filename) + raise_exception_if_baseline_file_is_unstaged(baseline_filename) - baseline_string = _get_baseline_string_from_file(baseline_filename) - baseline_version = json.loads(baseline_string).get('version') - - try: - raise_exception_if_baseline_version_is_outdated( - baseline_version, - ) - except ValueError: - log.error( - 'The supplied baseline may be incompatible with the current\n' - 'version of detect-secrets. Please recreate your baseline to\n' - 'avoid potential mis-configurations.\n\n' - '$ detect-secrets scan --update %s\n\n' - 'Current Version: %s\n' - 'Baseline Version: %s', + return SecretsCollection.load_baseline_from_string( + _get_baseline_string_from_file( baseline_filename, - VERSION, - baseline_version if baseline_version else '0.0.0', - ) - - raise - - return SecretsCollection.load_baseline_from_string(baseline_string) + ), + ) def _get_baseline_string_from_file(filename): # pragma: no cover @@ -130,7 +118,7 @@ def _get_baseline_string_from_file(filename): # pragma: no cover raise -def raise_exception_if_baseline_file_is_not_up_to_date(filename): +def raise_exception_if_baseline_file_is_unstaged(filename): """We want to make sure that if there are changes to the baseline file, they will be included in the commit. This way, we can keep our baselines up-to-date. @@ -161,44 +149,12 @@ def raise_exception_if_baseline_file_is_not_up_to_date(filename): raise ValueError -def raise_exception_if_baseline_version_is_outdated(version): - """ - Version changes may cause breaking changes with past baselines. - Due to this, we want to make sure that the version that the - baseline was created with is compatible with the current version - of the scanner. - - We use semantic versioning, and check for bumps in the MINOR - version (a good compromise, so we can release patches for other - non-baseline-related issues, without having all our users - recreate their baselines again). - - :type version: str|None - :param version: version of baseline - :raises: ValueError - """ - if not version: - # Baselines created before this change, so by definition, - # would be outdated. - raise ValueError - - baseline_version = version.split('.') - current_version = VERSION.split('.') - - if int(current_version[0]) > int(baseline_version[0]): - raise ValueError - elif current_version[0] == baseline_version[0] and \ - int(current_version[1]) > int(baseline_version[1]): - raise ValueError - - -def find_secrets_in_files(args): - plugins = initialize.from_parser_builder(args.plugins) +def find_secrets_in_files(args, plugins): collection = SecretsCollection(plugins) for filename in args.filenames: + # Don't scan the baseline file if filename == args.baseline[0]: - # Obviously, don't detect the baseline file continue collection.scan_file(filename) diff --git a/tests/core/baseline_test.py b/tests/core/baseline_test.py index 230088e1..49289484 100644 --- a/tests/core/baseline_test.py +++ b/tests/core/baseline_test.py @@ -11,7 +11,7 @@ from detect_secrets.core.baseline import get_secrets_not_in_baseline from detect_secrets.core.baseline import merge_baseline from detect_secrets.core.baseline import merge_results -from detect_secrets.core.baseline import update_baseline_with_removed_secrets +from detect_secrets.core.baseline import trim_baseline_of_removed_secrets from detect_secrets.core.potential_secret import PotentialSecret from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString from detect_secrets.plugins.high_entropy_strings import HexHighEntropyString @@ -31,19 +31,19 @@ def setup(self): def get_results( self, - rootdir='./test_data/files', + path='./test_data/files', exclude_regex=None, scan_all_files=False, ): return baseline.initialize( self.plugins, - rootdir=rootdir, + path=path, exclude_regex=exclude_regex, scan_all_files=scan_all_files, ).json() @pytest.mark.parametrize( - 'rootdir', + 'path', [ './test_data/files', @@ -51,8 +51,8 @@ def get_results( 'test_data/../test_data/files/tmp/..', ], ) - def test_basic_usage(self, rootdir): - results = self.get_results(rootdir=rootdir) + def test_basic_usage(self, path): + results = self.get_results(path=path) assert len(results.keys()) == 2 assert len(results['test_data/files/file_with_secrets.py']) == 1 @@ -82,7 +82,7 @@ def test_no_files_in_git_repo(self): ), ), ): - results = self.get_results(rootdir='will_be_mocked') + results = self.get_results(path='will_be_mocked') assert not results @@ -99,7 +99,7 @@ def test_single_non_tracked_git_file_should_work(self): assert len(results['will_be_mocked']) == 1 def test_scan_all_files(self): - results = self.get_results(rootdir='test_data/files', scan_all_files=True) + results = self.get_results(path='test_data/files', scan_all_files=True) assert len(results.keys()) == 2 @@ -229,7 +229,7 @@ def test_deleted_secret(self): }, ]) - is_successful = update_baseline_with_removed_secrets( + is_successful = trim_baseline_of_removed_secrets( new_findings, baseline, ['filename'], @@ -247,7 +247,7 @@ def test_deleted_secret_file(self): }, ]) - is_successful = update_baseline_with_removed_secrets( + is_successful = trim_baseline_of_removed_secrets( new_findings, baseline, [ @@ -272,7 +272,7 @@ def test_same_secret_new_location(self): }, ]) - is_successful = update_baseline_with_removed_secrets( + is_successful = trim_baseline_of_removed_secrets( new_findings, baseline, ['filename'], @@ -303,7 +303,7 @@ def test_no_baseline_modifications(self, results_dict, baseline_dict): new_findings = secrets_collection_factory([results_dict]) baseline = secrets_collection_factory([baseline_dict]) - assert not update_baseline_with_removed_secrets( + assert not trim_baseline_of_removed_secrets( new_findings, baseline, ['filename'], diff --git a/tests/pre_commit_hook_test.py b/tests/pre_commit_hook_test.py index ecfca5d2..e107e5c1 100644 --- a/tests/pre_commit_hook_test.py +++ b/tests/pre_commit_hook_test.py @@ -97,40 +97,55 @@ def test_quit_if_baseline_is_changed_but_not_staged(self, mock_log): 'baseline_version, current_version', [ ('', '0.8.8',), + ('0.8.8', '0.8.9',), ('0.8.8', '0.9.0',), ('0.8.8', '1.0.0',), ], ) - def test_fails_if_baseline_version_is_outdated( + def test_that_baseline_gets_updated( self, mock_log, baseline_version, current_version, ): with _mock_versions(baseline_version, current_version): - assert_commit_blocked( - '--baseline will_be_mocked', - ) - - assert mock_log.error_messages == ( - 'The supplied baseline may be incompatible with the current\n' - 'version of detect-secrets. Please recreate your baseline to\n' - 'avoid potential mis-configurations.\n' - '\n' - '$ detect-secrets scan --update will_be_mocked\n' - '\n' - 'Current Version: {}\n' - 'Baseline Version: {}\n' - ).format( - current_version, - baseline_version if baseline_version else '0.0.0', - ) - - def test_succeeds_if_patch_version_is_different(self): - with _mock_versions('0.8.8', '0.8.9'): - assert_commit_succeeds( - 'test_data/files/file_with_no_secrets.py', - ) + baseline_string = _create_baseline() + modified_baseline = json.loads(baseline_string) + + with mock.patch( + 'detect_secrets.pre_commit_hook._get_baseline_string_from_file', + return_value=json.dumps(modified_baseline), + ), mock.patch( + 'detect_secrets.pre_commit_hook._write_to_baseline_file', + ) as m: + assert_commit_blocked( + '--baseline will_be_mocked test_data/files/file_with_secrets.py', + ) + + baseline_written = m.call_args[0][1] + + original_baseline = json.loads(baseline_string) + assert original_baseline['exclude_regex'] == baseline_written['exclude_regex'] + assert original_baseline['results'] == baseline_written['results'] + + # See that we updated the plugins and version + assert current_version == baseline_written['version'] + assert baseline_written['plugins_used'] == [ + { + 'base64_limit': 4.5, + 'name': 'Base64HighEntropyString', + }, + { + 'name': 'BasicAuthDetector', + }, + { + 'hex_limit': 3, + 'name': 'HexHighEntropyString', + }, + { + 'name': 'PrivateKeyDetector', + }, + ] def test_writes_new_baseline_if_modified(self): baseline_string = _create_baseline()