Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update merge results to only propagate is_secret of new secrets #90

Merged
merged 4 commits into from
Oct 25, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 7 additions & 34 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ def merge_baseline(old_baseline, new_baseline):
to the new baseline, and will only work with baselines created
after v0.9.

Note: that the exclude regex is handled separately.

:type old_baseline: dict
:param old_baseline: baseline dict, loaded from previous baseline

Expand All @@ -174,14 +176,6 @@ def merge_baseline(old_baseline, new_baseline):
def merge_results(old_results, new_results):
"""Update results in baseline with latest information.

As a rule of thumb, we want to favor the new results, yet at the same
time, transfer non-modified data from the old results set.

Assumptions:
* The list of results in each secret set is in the same order.
This means that new_results cannot have *more* results than
old_results.

:type old_results: dict
:param old_results: results of status quo

Expand All @@ -190,42 +184,21 @@ def merge_results(old_results, new_results):

:rtype: dict
"""
for filename, secrets in old_results.items():
for filename, old_secrets in old_results.items():
if filename not in new_results:
new_results[filename] = secrets
continue

if len(secrets) == len(new_results[filename]):
# Assuming that secrets remain in order.
KevinHock marked this conversation as resolved.
Show resolved Hide resolved
for index, secrets_tuple in enumerate(zip(secrets, new_results[filename])):
old_secret, new_secret = secrets_tuple
for new_secret in new_results[filename]:
for old_secret in old_secrets:
if old_secret['hashed_secret'] != new_secret['hashed_secret']:
# We don't join the two secret sets, because if the later
# result set did not discover an old secret, it's probably
# moved.
# If it did discover it, then lengths would be different.
continue

# Only propogate 'is_secret' if it's not already there
if 'is_secret' in old_secret and 'is_secret' not in new_secret:
# If the new_secret has a label, then go with the later
# version.
new_results[filename][index] = old_secret

continue

# Need to figure out starting point. That is, while
# len(new_results) < len(old_results), they may not start at the same
# place.
#
# e.g. old_results = A,B,C,D
# new_results = B,C
first_secret_hash = new_results[filename][0]['hashed_secret']
for index, secret in enumerate(secrets):
if secret['hashed_secret'] == first_secret_hash:
new_results[filename] = secrets[:index] + \
new_results[filename] + \
secrets[index + len(new_results[filename]):]
break
new_secret['is_secret'] = old_secret['is_secret']

return new_results

Expand Down
5 changes: 1 addition & 4 deletions tests/core/baseline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def test_new_results_has_nothing(self):
],
}

assert merge_results(old_result, {}) == old_result
assert merge_results(old_result, {}) == {}

def test_old_results_have_subset_of_new_results(self):
secretA = self.get_secret()
Expand All @@ -446,7 +446,6 @@ def test_old_results_have_subset_of_new_results(self):
) == {
'filenameA': [
modified_secretA,
secretB,
],
}

Expand Down Expand Up @@ -480,10 +479,8 @@ def test_old_results_have_shifted_subset(self):
},
) == {
'filename': [
secretA,
modified_secretB,
modified_secretC,
secretD,
],
}

Expand Down