Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pre-commit-hook] Automatically update the baseline #96

Merged
merged 1 commit into from
Nov 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 22 additions & 14 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,47 @@
from detect_secrets.core.secrets_collection import SecretsCollection


def initialize(plugins, exclude_regex=None, rootdir='.', scan_all_files=False):
"""Scans the entire codebase for high entropy strings, and returns a
def initialize(
plugins,
exclude_regex=None,
path='.',
scan_all_files=False,
):
"""Scans the entire codebase for secrets, and returns a
SecretsCollection object.

:type plugins: tuple of detect_secrets.plugins.base.BasePlugin
:param plugins: rules to initialize the SecretsCollection with.

:type exclude_regex: str|None
:type rootdir: str
:type path: str
:type scan_all_files: bool

:rtype: SecretsCollection
"""
output = SecretsCollection(plugins, exclude_regex)

if os.path.isfile(rootdir):
if os.path.isfile(path):
# This option allows for much easier adhoc usage.
git_files = [rootdir]
files_to_scan = [path]
elif scan_all_files:
git_files = _get_files_recursively(rootdir)
files_to_scan = _get_files_recursively(path)
else:
git_files = _get_git_tracked_files(rootdir)
files_to_scan = _get_git_tracked_files(path)

if not git_files:
if not files_to_scan:
return output

if exclude_regex:
regex = re.compile(exclude_regex, re.IGNORECASE)
git_files = filter(
lambda x: not regex.search(x),
git_files,
files_to_scan = filter(
lambda file: (
not regex.search(file)
),
files_to_scan,
)

for file in git_files:
for file in files_to_scan:
output.scan_file(file)

return output
Expand Down Expand Up @@ -86,7 +94,7 @@ def get_secrets_not_in_baseline(results, baseline):
return new_secrets


def update_baseline_with_removed_secrets(results, baseline, filelist):
def trim_baseline_of_removed_secrets(results, baseline, filelist):
"""
NOTE: filelist is not a comprehensive list of all files in the repo
(because we can't be sure whether --all-files is passed in as a
Expand Down Expand Up @@ -200,7 +208,7 @@ def merge_results(old_results, new_results):
continue

old_secret = old_secrets_mapping[new_secret['hashed_secret']]
# Only propogate 'is_secret' if it's not already there
# Only propagate 'is_secret' if it's not already there
if 'is_secret' in old_secret and 'is_secret' not in new_secret:
new_secret['is_secret'] = old_secret['is_secret']

Expand Down
7 changes: 5 additions & 2 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def _load_baseline_from_dict(cls, data):
'exclude_regex',
'plugins_used',
'results',
'version',
)):
raise IOError

Expand Down Expand Up @@ -95,7 +94,11 @@ def _load_baseline_from_dict(cls, data):
secret.secret_hash = item['hashed_secret']
result.data[filename][secret] = secret

result.version = data['version']
result.version = (
data['version']
if 'version' in data
else '0.0.0'
)

return result

Expand Down
8 changes: 6 additions & 2 deletions detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def add_console_use_arguments(self):
dest='action',
)

for action_parser in [ScanOptions, AuditOptions]:
for action_parser in (ScanOptions, AuditOptions):
action_parser(subparser).add_arguments()

return self
Expand Down Expand Up @@ -62,7 +62,11 @@ def _add_verbosity_argument(self):
return self

def _add_filenames_argument(self):
self.parser.add_argument('filenames', nargs='*', help='Filenames to check')
self.parser.add_argument(
'filenames',
nargs='*',
help='Filenames to check',
)
return self

def _add_set_baseline_argument(self):
Expand Down
82 changes: 19 additions & 63 deletions detect_secrets/pre_commit_hook.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from __future__ import absolute_import

import json
import subprocess
import sys
import textwrap

from detect_secrets import VERSION
from detect_secrets.core.baseline import format_baseline_for_output
from detect_secrets.core.baseline import get_secrets_not_in_baseline
from detect_secrets.core.baseline import update_baseline_with_removed_secrets
from detect_secrets.core.baseline import trim_baseline_of_removed_secrets
from detect_secrets.core.log import get_logger
from detect_secrets.core.secrets_collection import SecretsCollection
from detect_secrets.core.usage import ParserBuilder
Expand Down Expand Up @@ -36,7 +35,8 @@ def main(argv=None):
# Error logs handled within logic.
return 1

results = find_secrets_in_files(args)
plugins = initialize.from_parser_builder(args.plugins)
results = find_secrets_in_files(args, plugins)
if baseline_collection:
original_results = results
results = get_secrets_not_in_baseline(
Expand All @@ -52,12 +52,18 @@ def main(argv=None):
return 0

# Only attempt baseline modifications if we don't find any new secrets
successful_update = update_baseline_with_removed_secrets(
baseline_modified = trim_baseline_of_removed_secrets(
original_results,
baseline_collection,
args.filenames,
)
if successful_update:

if VERSION != baseline_collection.version:
baseline_collection.plugins = plugins
baseline_collection.version = VERSION
baseline_modified = True

if baseline_modified:
_write_to_baseline_file(
args.baseline[0],
baseline_collection.format_for_baseline_output(),
Expand Down Expand Up @@ -87,31 +93,13 @@ def get_baseline(baseline_filename):
if not baseline_filename:
return

raise_exception_if_baseline_file_is_not_up_to_date(baseline_filename)
raise_exception_if_baseline_file_is_unstaged(baseline_filename)

baseline_string = _get_baseline_string_from_file(baseline_filename)
baseline_version = json.loads(baseline_string).get('version')

try:
raise_exception_if_baseline_version_is_outdated(
baseline_version,
)
except ValueError:
log.error(
'The supplied baseline may be incompatible with the current\n'
'version of detect-secrets. Please recreate your baseline to\n'
'avoid potential mis-configurations.\n\n'
'$ detect-secrets scan --update %s\n\n'
'Current Version: %s\n'
'Baseline Version: %s',
return SecretsCollection.load_baseline_from_string(
_get_baseline_string_from_file(
baseline_filename,
VERSION,
baseline_version if baseline_version else '0.0.0',
)

raise

return SecretsCollection.load_baseline_from_string(baseline_string)
),
)


def _get_baseline_string_from_file(filename): # pragma: no cover
Expand All @@ -130,7 +118,7 @@ def _get_baseline_string_from_file(filename): # pragma: no cover
raise


def raise_exception_if_baseline_file_is_not_up_to_date(filename):
def raise_exception_if_baseline_file_is_unstaged(filename):
"""We want to make sure that if there are changes to the baseline
file, they will be included in the commit. This way, we can keep
our baselines up-to-date.
Expand Down Expand Up @@ -161,44 +149,12 @@ def raise_exception_if_baseline_file_is_not_up_to_date(filename):
raise ValueError


def raise_exception_if_baseline_version_is_outdated(version):
"""
Version changes may cause breaking changes with past baselines.
Due to this, we want to make sure that the version that the
baseline was created with is compatible with the current version
of the scanner.

We use semantic versioning, and check for bumps in the MINOR
version (a good compromise, so we can release patches for other
non-baseline-related issues, without having all our users
recreate their baselines again).

:type version: str|None
:param version: version of baseline
:raises: ValueError
"""
if not version:
# Baselines created before this change, so by definition,
# would be outdated.
raise ValueError

baseline_version = version.split('.')
current_version = VERSION.split('.')

if int(current_version[0]) > int(baseline_version[0]):
raise ValueError
elif current_version[0] == baseline_version[0] and \
int(current_version[1]) > int(baseline_version[1]):
raise ValueError


def find_secrets_in_files(args):
plugins = initialize.from_parser_builder(args.plugins)
def find_secrets_in_files(args, plugins):
collection = SecretsCollection(plugins)

for filename in args.filenames:
# Don't scan the baseline file
if filename == args.baseline[0]:
# Obviously, don't detect the baseline file
continue

collection.scan_file(filename)
Expand Down
24 changes: 12 additions & 12 deletions tests/core/baseline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from detect_secrets.core.baseline import get_secrets_not_in_baseline
from detect_secrets.core.baseline import merge_baseline
from detect_secrets.core.baseline import merge_results
from detect_secrets.core.baseline import update_baseline_with_removed_secrets
from detect_secrets.core.baseline import trim_baseline_of_removed_secrets
from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString
from detect_secrets.plugins.high_entropy_strings import HexHighEntropyString
Expand All @@ -31,28 +31,28 @@ def setup(self):

def get_results(
self,
rootdir='./test_data/files',
path='./test_data/files',
exclude_regex=None,
scan_all_files=False,
):
return baseline.initialize(
self.plugins,
rootdir=rootdir,
path=path,
exclude_regex=exclude_regex,
scan_all_files=scan_all_files,
).json()

@pytest.mark.parametrize(
'rootdir',
'path',
[
'./test_data/files',

# Test relative paths
'test_data/../test_data/files/tmp/..',
],
)
def test_basic_usage(self, rootdir):
results = self.get_results(rootdir=rootdir)
def test_basic_usage(self, path):
results = self.get_results(path=path)

assert len(results.keys()) == 2
assert len(results['test_data/files/file_with_secrets.py']) == 1
Expand Down Expand Up @@ -82,7 +82,7 @@ def test_no_files_in_git_repo(self):
),
),
):
results = self.get_results(rootdir='will_be_mocked')
results = self.get_results(path='will_be_mocked')

assert not results

Expand All @@ -99,7 +99,7 @@ def test_single_non_tracked_git_file_should_work(self):
assert len(results['will_be_mocked']) == 1

def test_scan_all_files(self):
results = self.get_results(rootdir='test_data/files', scan_all_files=True)
results = self.get_results(path='test_data/files', scan_all_files=True)
assert len(results.keys()) == 2


Expand Down Expand Up @@ -229,7 +229,7 @@ def test_deleted_secret(self):
},
])

is_successful = update_baseline_with_removed_secrets(
is_successful = trim_baseline_of_removed_secrets(
new_findings,
baseline,
['filename'],
Expand All @@ -247,7 +247,7 @@ def test_deleted_secret_file(self):
},
])

is_successful = update_baseline_with_removed_secrets(
is_successful = trim_baseline_of_removed_secrets(
new_findings,
baseline,
[
Expand All @@ -272,7 +272,7 @@ def test_same_secret_new_location(self):
},
])

is_successful = update_baseline_with_removed_secrets(
is_successful = trim_baseline_of_removed_secrets(
new_findings,
baseline,
['filename'],
Expand Down Expand Up @@ -303,7 +303,7 @@ def test_no_baseline_modifications(self, results_dict, baseline_dict):
new_findings = secrets_collection_factory([results_dict])
baseline = secrets_collection_factory([baseline_dict])

assert not update_baseline_with_removed_secrets(
assert not trim_baseline_of_removed_secrets(
new_findings,
baseline,
['filename'],
Expand Down
Loading