Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support url-safe base64 secrets #245

Merged
merged 12 commits into from
Oct 24, 2019
2 changes: 1 addition & 1 deletion detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def _extract_secrets_from_patch(self, f, plugin, filename):
for line in chunk.target_lines():
if line.is_added:
output.update(
plugin.analyze_string(
plugin.analyze_line(
line.value,
line.target_line_no,
filename,
Expand Down
41 changes: 35 additions & 6 deletions detect_secrets/plugins/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,35 @@ class BasePlugin(object):
def secret_type(self):
raise NotImplementedError

def __init__(self, exclude_lines_regex=None, should_verify=False, **kwargs):
def __init__(
self,
exclude_lines_regex=None,
should_verify=False,
false_positive_heuristics=None,
**kwargs
):
"""
:type exclude_lines_regex: str|None
:param exclude_lines_regex: optional regex for ignored lines.

:type should_verify: bool

:type false_positive_heuristics: List[Callable]|None
:param false_positive_heuristics: List of fp-heuristic functions
applicable to this plugin
"""
self.exclude_lines_regex = None
if exclude_lines_regex:
self.exclude_lines_regex = re.compile(exclude_lines_regex)

self.should_verify = should_verify

self.false_positive_heuristics = (
false_positive_heuristics
if false_positive_heuristics
else []
)

@classproperty
def disable_flag_text(cls):
name = cls.__name__
Expand Down Expand Up @@ -97,7 +113,7 @@ def analyze(self, file, filename):
potential_secrets = {}
file_lines = tuple(file.readlines())
for line_num, line in enumerate(file_lines, start=1):
results = self.analyze_string(line, line_num, filename)
results = self.analyze_line(line, line_num, filename)
if not self.should_verify:
potential_secrets.update(results)
continue
Expand All @@ -121,7 +137,7 @@ def analyze(self, file, filename):

return potential_secrets

def analyze_string(self, string, line_num, filename):
def analyze_line(self, string, line_num, filename):
"""
:param string: string; the line to analyze
:param line_num: integer; line number that is currently being analyzed
Expand Down Expand Up @@ -163,7 +179,7 @@ def analyze_string_content(self, string, line_num, filename):
@abstractmethod
def secret_generator(self, string, *args, **kwargs):
"""Flags secrets in a given string, and yields the raw secret value.
Used in self.analyze_string for PotentialSecret creation.
Used in self.analyze_line for PotentialSecret creation.

:type string: str
:param string: the secret to scan
Expand All @@ -178,7 +194,7 @@ def adhoc_scan(self, string):
check what different plugins say regarding a single line/secret. This
supports that.

This is very similar to self.analyze_string, but allows the flexibility
This is very similar to self.analyze_line, but allows the flexibility
for subclasses to add any other notable info (rather than just a
PotentialSecret type). e.g. HighEntropyStrings adds their Shannon
entropy in which they made their decision.
Expand All @@ -191,7 +207,7 @@ def adhoc_scan(self, string):
<classname>: <returned-value>
"""
# TODO: Handle multiple secrets on single line.
results = self.analyze_string(
results = self.analyze_line(
string,
line_num=0,
filename='does_not_matter',
Expand Down Expand Up @@ -232,6 +248,19 @@ def verify(self, token, content=''):
"""
return VerifiedResult.UNVERIFIED

def is_secret_false_positive(self, token):
"""
Checks if the input secret is a false-positive according to
this plugin's heuristics.

:type token: str
:param token: secret found by current plugin
"""
return any(
func(token)
for func in self.false_positive_heuristics
) if self.false_positive_heuristics else False

@property
def __dict__(self):
return {
Expand Down
112 changes: 90 additions & 22 deletions detect_secrets/plugins/common/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,13 @@
This abstraction allows for development of later ML work, or further
heuristical determinations (e.g. word filter, entropy comparator).
"""
import re
import string

from detect_secrets.util import is_python_2


def is_false_positive(secret, automaton):
"""
:type secret: str

:type automaton: ahocorasick.Automaton|None
:param automaton: optional automaton for ignoring certain words.

:rtype: bool
Returns True if any false positive heuristic function returns True.
"""
return any(
func(secret, automaton)
for func in
(
_is_found_with_aho_corasick,
_is_sequential_string,
)
)


def _is_found_with_aho_corasick(secret, automaton):
def is_found_with_aho_corasick(secret, automaton):
"""
:type secret: str

Expand All @@ -53,7 +34,20 @@ def _is_found_with_aho_corasick(secret, automaton):
return False


def _is_sequential_string(secret, *args):
def get_aho_corasick_helper(automaton):
"""
Returns a function which determines if a word matches the
input automaton.

:type automaton: ahocorasick.Automaton
"""
def fn(secret):
return is_found_with_aho_corasick(secret, automaton)

return fn


def is_sequential_string(secret, *args):
"""
:type secret: str

Expand Down Expand Up @@ -97,3 +91,77 @@ def _is_sequential_string(secret, *args):
return True

return False


_UUID_REGEX = re.compile(
r'[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12}',
re.IGNORECASE,
)


def is_potential_uuid(secret, *args):
"""
Determines if a potential secret contains any UUIDs.

:type secret: str

:rtype: bool
Returns True if the string has a UUID, false otherwise.
"""

# Using a regex to find strings that look like false-positives
# will find us more false-positives than if we just tried validate
# the input string as a UUID (for example, if the string has a prefix
# or suffix).
return bool(_UUID_REGEX.search(secret))


# NOTE: this doesn't handle multiple key-values on a line properly.
# NOTE: words that end in "id" will be treated as ids
_ID_DETECTOR_REGEX = re.compile(r'id[^a-z0-9]', re.IGNORECASE)


def is_likely_id_string(secret, line):
"""
:type secret: str

:type line: str
:param line: Line context for the plaintext secret

:rtype: bool
Returns true if the secret could be an id, false otherwise.
"""
if secret not in line:
return False

secret_index = line.index(secret)
return bool(_ID_DETECTOR_REGEX.search(line, pos=0, endpos=secret_index))


DEFAULT_FALSE_POSITIVE_WITH_LINE_CONTEXT_HEURISTICS = [
is_likely_id_string,
OiCMudkips marked this conversation as resolved.
Show resolved Hide resolved
]


def is_false_positive_with_line_context(
secret,
line,
functions=DEFAULT_FALSE_POSITIVE_WITH_LINE_CONTEXT_HEURISTICS,
):
"""
:type secret: str

:type line: str
:param line: plaintext line on which secret was found

:type functions: Iterable[Callable]
:param functions: list of heuristics to use

:rtype: bool
Returns True if any false-positive heuristic which considers the whole file line
returns true.
"""
return any(
func(secret, line)
for func in functions
)
2 changes: 1 addition & 1 deletion detect_secrets/plugins/common/ini_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def iterator(self):
key,
values,
):
yield value, offset
yield key, value, offset

def _get_value_and_line_offset(self, key, values):
"""Returns the index of the location of key, value pair in lines.
Expand Down
5 changes: 5 additions & 0 deletions detect_secrets/plugins/common/yaml_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ def _tag_dict_values(self, map_node):
value=str(value.tag.endswith(':binary')),
tag='tag:yaml.org,2002:bool',
),
self._create_key_value_pair_for_mapping_node_value(
key='__original_key__',
value=key.value,
tag='tag:yaml.org,2002:str',
),
],
)

Expand Down
Loading