Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Respect options from baseline #124

Merged
merged 9 commits into from
Feb 6, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def add_use_all_plugins_argument(parser):
parser.add_argument(
'--use-all-plugins',
action='store_true',
help='Use all available plugins to scan files',
help='Use all available plugins to scan files.',
)


Expand Down Expand Up @@ -309,6 +309,7 @@ def consolidate_args(args):

active_plugins = {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please put a comment here that explains the rationale for why we're returning these new values.

I understand the purpose of doing it, and that there's really no better way to incorporate the baseline values in it. It's just unfortunate that this hairy logic to perform prioritization of plugins needs to live in two places (unlike this doc string suggests).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

disabled_plugins is mainly used to filter off some disabled plugins when reading the plugin list from baseline. Relates to the comment below, it's possible to generate the disabled_plugins list on-demand by calculating the difference between all plugins and active plugins. I can add a helper function to the usage.py.

I was using disabled_plugins earlier to avoid re-iterating on the all plugins to get such list.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha. Makes sense.

Thanks for adding the helper function anyway!

disabled_plugins = {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I think about it, what is our purpose of returning a set of disabled_plugins? Since args.plugins is a collection of all active plugins (combining default values and specifically set values), wouldn't any plugin that isn't in this dictionary be disabled?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment above.

param_from_default = {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe is_default_parameter, so it's clearer that it contains boolean values?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good idea. I would make a suggestion to rename it to is_using_default_value


for plugin in PluginOptions.all_plugins:
arg_name = PluginOptions._convert_flag_text_to_argument_name(
Expand Down Expand Up @@ -342,13 +343,15 @@ def consolidate_args(args):

if default_value and related_args[arg_name] is None:
related_args[arg_name] = default_value
param_from_default[arg_name] = True

active_plugins.update({
plugin.classname: related_args,
})

args.plugins = active_plugins
args.disabled_plugins = disabled_plugins
args.param_from_default = param_from_default

def _add_custom_limits(self):
high_entropy_help_text = (
Expand Down
12 changes: 5 additions & 7 deletions detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,6 @@ def main(argv=None):
_scan_string(line, plugins)

else:
if args.import_filename:
plugins = initialize.merge_plugin_from_baseline(
_get_plugin_from_baseline(args.import_filename), args,
)

baseline_dict = _perform_scan(args, plugins)

if args.import_filename:
Expand Down Expand Up @@ -82,8 +77,7 @@ def main(argv=None):
return 0


def _get_plugin_from_baseline(baseline_file):
old_baseline = _get_existing_baseline(baseline_file)
def _get_plugin_from_baseline(old_baseline):
plugins = []
if old_baseline and "plugins_used" in old_baseline:
secrets_collection = SecretsCollection.load_baseline_from_dict(old_baseline)
Expand Down Expand Up @@ -118,6 +112,10 @@ def _perform_scan(args, plugins):
:rtype: dict
"""
old_baseline = _get_existing_baseline(args.import_filename)
if old_baseline:
plugins = initialize.merge_plugin_from_baseline(
_get_plugin_from_baseline(old_baseline), args,
)

# Favors --exclude argument over existing baseline's regex (if exists)
if args.exclude:
Expand Down
80 changes: 60 additions & 20 deletions detect_secrets/plugins/common/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,68 @@ def from_parser_builder(plugins_dict):


def merge_plugin_from_baseline(baseline_plugins, args):
# if --use-all-plugins
# include all parsed plugins
# else
# include all baseline plugins
# remove all disabled plugins
"""
:type baseline_plugins: tuple of BasePlugin
:param baseline_plugins: BasePlugin instances from baseline file

:type args: dict
:param args: diction of arguments parsed from usage

plugins = []
param priority: input param > baseline param > default

:Returns tuple of initialized plugins
"""
def _remove_key(d, key):
r = dict(d)
r.pop(key)
return r
baseline_plugins_dict = {
vars(plugin)["name"]: _remove_key(vars(plugin), "name")
for plugin in baseline_plugins
}

# Use input plugin as starting point
if args.use_all_plugins:
plugins = from_parser_builder(args.plugins)
elif args.disabled_plugins: # strip some plugins off baseline
plugins = _merge_plugin_from_baseline(baseline_plugins, args)
else:
plugins = baseline_plugins
return plugins


def _merge_plugin_from_baseline(baseline_plugins, args):
merged_plugins_dict = {vars(plugin)['name']: plugin for plugin in baseline_plugins}
for plugin_name in args.disabled_plugins:
if plugin_name in merged_plugins_dict:
merged_plugins_dict.pop(plugin_name)
return merged_plugins_dict.values()
# input param and default param are used
plugins_dict = dict(args.plugins)

# baseline param priority > default
for plugin_name, plugin_params in list(baseline_plugins_dict.items()):
for param_name, param_value in list(plugin_params.items()):
from_default = args.param_from_default.get(param_name, False)
if from_default:
try:
plugins_dict[plugin_name][param_name] = param_value
except KeyError:
log.warning(
'Baseline contain plugin %s which is not in all plugins! Ignoring...'
% (plugin_name),
)

return from_parser_builder(plugins_dict)

# Use baseline plugin as starting point
plugins_dict = baseline_plugins_dict
if args.disabled_plugins:
for plugin_name in args.disabled_plugins:
if plugin_name in plugins_dict:
plugins_dict.pop(plugin_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plugins_dict = {
    plugin_name: plugin_params
    for plugin_name, plugin_params in baseline_plugins_dict.items()
    if plugin_name not in args.disabled_plugins
}


# input param priority > baseline
input_plugins_dict = dict(args.plugins)
for plugin_name, plugin_params in list(input_plugins_dict.items()):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't need to turn input_plugins_dict.items() into a list. It gives us an iterator, so we get some performance boosts by avoiding turning it into a list, before iterating through it.

Same goes for other uses of this behavior.

for param_name, param_value in list(plugin_params.items()):
from_default = args.param_from_default.get(param_name, False)
if from_default is False:
try:
plugins_dict[plugin_name][param_name] = param_value
except KeyError:
log.warning(
'%s specified, but %s not configured! Ignoring...'
% ("".join(["--", param_name.replace("_", "-")]), plugin_name),
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make this DRYer, seeing that we use it below as well? One idea might be to implement an iterator, or just encapsulate it in a function. For example:

def get_prioritized_parameters(plugins, is_default_plugins_map, prefer_default=True):
    """
    :type is_default_plugins_map: dict(str => bool)
    :param is_default_plugins_map: mapping of parameter name to whether its value is derived
        from a default value.

    :param prefer_default: if True, will yield if plugin parameters are from default values.
        Otherwise, will yield if plugin parameters are *not* from default values.
    """
    ...
    yield plugin_name, param_name, param_value

Then, you can do:

plugins_dict = dict(args.plugins)
for plugin_name, param_name, param_value in get_prioritized_parameters(
    input_plugins_dict,
    args.param_from_default,
    prefer_default=False,
):
    try:
        plugins_dict[plugin_name][param_name] = param_value
    except KeyError:
        ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's been updated now. Although having more lines, but I do think the logic is a little bit more clear.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clearer logic >> more lines, especially when it comes to long term maintenance 😸


return from_parser_builder(plugins_dict)


def from_plugin_classname(plugin_classname, **kwargs):
Expand Down
66 changes: 58 additions & 8 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def test_old_baseline_ignored_with_update_flag(
"name": "AWSKeyDetector",
},
{
"base64_limit": 4.5,
"base64_limit": 1.5,
"name": "Base64HighEntropyString",
},
{
Expand All @@ -219,6 +219,9 @@ def test_old_baseline_ignored_with_update_flag(
{
"name": "PrivateKeyDetector",
},
{
"name": "SlackDetector",
},
],
),
( # remove some plugins from all plugins
Expand All @@ -244,6 +247,9 @@ def test_old_baseline_ignored_with_update_flag(
{
"name": "KeywordDetector",
},
{
"name": "SlackDetector",
},
],
),
( # use same plugin list from baseline
Expand All @@ -267,7 +273,7 @@ def test_old_baseline_ignored_with_update_flag(
},
],
),
( # ignore overwriten option from CLI when not using --use-all-plugins
( # overwrite base limit from CLI
[
{
"base64_limit": 3.5,
Expand All @@ -276,17 +282,30 @@ def test_old_baseline_ignored_with_update_flag(
"name": "PrivateKeyDetector",
},
],
'--base64-limit=4.5',
'--base64-limit=5.5',
[
{
"base64_limit": 3.5,
"base64_limit": 5.5,
"name": "Base64HighEntropyString",
},
{
"name": "PrivateKeyDetector",
},
],
),
( # does not overwrite base limit from CLI if baseline not using the plugin
[
{
"name": "PrivateKeyDetector",
},
],
'--base64-limit=4.5',
[
{
"name": "PrivateKeyDetector",
},
],
),
( # use overwriten option from CLI only when using --use-all-plugins
[
{
Expand All @@ -297,13 +316,43 @@ def test_old_baseline_ignored_with_update_flag(
"name": "PrivateKeyDetector",
},
],
'--use-all-plugins --base64-limit=4.5 --no-hex-string-scan --no-keyword-scan',
'--use-all-plugins --base64-limit=5.5 --no-hex-string-scan --no-keyword-scan',
[
{
"name": "AWSKeyDetector",
},
{
"base64_limit": 4.5,
"base64_limit": 5.5,
"name": "Base64HighEntropyString",
},
{
"name": "BasicAuthDetector",
},
{
"name": "PrivateKeyDetector",
},
{
"name": "SlackDetector",
},
],
),
( # use plugin limit from baseline when using --use-all-plugins and no input limit
[
{
"base64_limit": 2.5,
"name": "Base64HighEntropyString",
},
{
"name": "PrivateKeyDetector",
},
],
'--use-all-plugins --no-hex-string-scan --no-keyword-scan',
[
{
"name": "AWSKeyDetector",
},
{
"base64_limit": 2.5,
"name": "Base64HighEntropyString",
},
{
Expand All @@ -312,6 +361,9 @@ def test_old_baseline_ignored_with_update_flag(
{
"name": "PrivateKeyDetector",
},
{
"name": "SlackDetector",
},
],
),
],
Expand Down Expand Up @@ -341,8 +393,6 @@ def test_plugin_from_old_baseline_respected_with_update_flag(
),
) == 0

print("Used:", file_writer.call_args[1]['data']['plugins_used'])
print("Wrote:", plugins_wrote)
assert file_writer.call_args[1]['data']['plugins_used'] == \
plugins_wrote

Expand Down
6 changes: 3 additions & 3 deletions tests/pre_commit_hook_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_file_no_secrets(self):
assert_commit_succeeds('test_data/files/file_with_no_secrets.py')

@pytest.mark.parametrize(
' has_result, use_private_key_scan,, hook_command, commit_result',
'has_result, use_private_key_scan, hook_command, commit_succeeds',
[
# basic case
(True, True, '--baseline will_be_mocked test_data/files/file_with_secrets.py', True),
Expand Down Expand Up @@ -75,7 +75,7 @@ def test_baseline(
has_result,
use_private_key_scan,
hook_command,
commit_result,
commit_succeeds,
):
"""This just checks if the baseline is loaded, and acts appropriately.
More detailed baseline tests are in their own separate test suite.
Expand All @@ -87,7 +87,7 @@ def test_baseline(
use_private_key_scan=use_private_key_scan,
),
):
if commit_result:
if commit_succeeds:
assert_commit_succeeds(hook_command)
else:
assert_commit_blocked(hook_command)
Expand Down