Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Option for ManSpider: Exclude a list of files from the parsing by content #49

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,25 +133,28 @@ For example, you could specify any or all of these:

## Usage:
~~~
usage: manspider [-h] [-u USERNAME] [-p PASSWORD] [-d DOMAIN] [-m MAXDEPTH] [-H HASH] [-t THREADS] [-f REGEX [REGEX ...]] [-e EXT [EXT ...]] [--exclude-extensions EXT [EXT ...]]
[-c REGEX [REGEX ...]] [--sharenames SHARE [SHARE ...]] [--exclude-sharenames [SHARE ...]] [--dirnames DIR [DIR ...]] [--exclude-dirnames DIR [DIR ...]] [-q] [-n]
[-mfail INT] [-o] [-s SIZE] [-v]
usage: manspider [-h] [-u USERNAME] [-p PASSWORD] [-d DOMAIN] [-l LOOT_DIR] [-m MAXDEPTH] [-H HASH] [-t THREADS] [-f REGEX [REGEX ...]]
[-e EXT [EXT ...]] [--exclude-extensions EXT [EXT ...]] [--exclude-files EXCLUDEDFILES [EXCLUDEDFILES ...]]
[-c REGEX [REGEX ...]] [--sharenames SHARE [SHARE ...]] [--exclude-sharenames [SHARE ...]] [--dirnames DIR [DIR ...]]
[--exclude-dirnames DIR [DIR ...]] [-q] [-n] [-mfail INT] [-o] [-s SIZE] [-v]
targets [targets ...]

Scan for juicy data on SMB shares. Matching files and logs are stored in $HOME/.manspider. All filters are case-insensitive.

positional arguments:
targets IPs, Hostnames, CIDR ranges, or files containing targets to spider (NOTE: local searching also supported, specify directory name or keyword "loot" to search
downloaded files)
targets IPs, Hostnames, CIDR ranges, or files containing targets to spider (NOTE: local searching also supported, specify
directory name or keyword "loot" to search downloaded files)

optional arguments:
options:
-h, --help show this help message and exit
-u USERNAME, --username USERNAME
username for authentication
-p PASSWORD, --password PASSWORD
password for authentication
-d DOMAIN, --domain DOMAIN
domain for authentication
-l LOOT_DIR, --loot-dir LOOT_DIR
loot directory (default ~/.manspider/)
-m MAXDEPTH, --maxdepth MAXDEPTH
maximum depth to spider (default: 10)
-H HASH, --hash HASH NTLM hash for authentication
Expand All @@ -163,6 +166,8 @@ optional arguments:
only show filenames with these extensions (space-separated, e.g. `docx xlsx` for only word & excel docs)
--exclude-extensions EXT [EXT ...]
ignore files with these extensions
--exclude-files EXCLUDEDFILES [EXCLUDEDFILES ...]
dont parse files with these names (space-separated, e.g. `office.exe junk.bin` to skip parsing for office.exe & junk.data)
-c REGEX [REGEX ...], --content REGEX [REGEX ...]
search for file content using regex (multiple supported)
--sharenames SHARE [SHARE ...]
Expand All @@ -181,4 +186,19 @@ optional arguments:
-s SIZE, --max-filesize SIZE
don't retrieve files over this size, e.g. "500K" or ".5M" (default: 10M)
-v, --verbose show debugging messages


# EXAMPLES

Example 1: Search the network for filenames that may contain creds
$ manspider 192.168.0.0/24 -f passw user admin account network login logon cred -d evilcorp -u bob -p Passw0rd

Example 2: Search for XLSX files containing "password"
$ manspider share.evilcorp.local -c password -e xlsx -d evilcorp -u bob -p Passw0rd

Example 3: Search for interesting file extensions
$ manspider share.evilcorp.local -e bat com vbs ps1 psd1 psm1 pem key rsa pub reg txt cfg conf config -d evilcorp -u bob -p Passw0rd

Example 4: Search for finance-related files
$ manspider share.evilcorp.local --dirnames bank financ payable payment reconcil remit voucher vendor eft swift -f '[0-9]{5,}' -d evilcorp -u bob -p Passw0rd
~~~
46 changes: 32 additions & 14 deletions man_spider/lib/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from ..logger import *
import subprocess as sp
from ..logger import ColoredFormatter
from pathlib import PosixPath

log = logging.getLogger('manspider.parser')

Expand Down Expand Up @@ -38,9 +39,12 @@ class FileParser:
]


def __init__(self, filters, quiet=False):
def __init__(self, filters, files_toskip, quiet=False):

self.init_content_filters(filters)
#! list of files to skip from parsing
self.files_toskip = files_toskip

self.quiet = quiet


Expand Down Expand Up @@ -126,23 +130,37 @@ def parse_file(self, file, pretty_filename=None):

if pretty_filename is None:
pretty_filename = str(file)

#! We are in remote mode
if type(pretty_filename) == str:
filename_tocheck = str(pretty_filename).split("\\")[-1]
#! We are in local mode
elif type(pretty_filename) == PosixPath:
filename_tocheck = pretty_filename.name

#! Skip the file if it's excluded from the parsing (with option: (--exclude-files ...))
if filename_tocheck in self.files_toskip:
log.debug(f"Skipping {str(pretty_filename)}: one of the filenames to skip")
return None

else:
#! Parse the file
log.debug(f'Parsing file: {pretty_filename}')

log.debug(f'Parsing file: {pretty_filename}')

matches = dict()
matches = dict()

try:
try:

matches = self.textract(file, pretty_filename=pretty_filename)
matches = self.textract(file, pretty_filename=pretty_filename)

except Exception as e:
#except (BadZipFile, textract.exceptions.CommandLineError) as e:
if log.level <= logging.DEBUG:
log.warning(f'Error extracting text from {pretty_filename}: {e}')
else:
log.warning(f'Error extracting text from {pretty_filename} (-v to debug)')

return matches
except Exception as e:
#except (BadZipFile, textract.exceptions.CommandLineError) as e:
if log.level <= logging.DEBUG:
log.warning(f'Error extracting text from {pretty_filename}: {e}')
else:
log.warning(f'Error extracting text from {pretty_filename} (-v to debug)')
return matches


def textract(self, file, pretty_filename):
Expand Down
7 changes: 6 additions & 1 deletion man_spider/lib/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,18 @@ def __init__(self, options):
self.or_logic = options.or_logic

self.extension_blacklist= options.exclude_extensions

#! List of files to exclude from parsing
self.exclude_files = options.exclude_files

self.file_extensions = options.extensions
if self.file_extensions:
extensions_str = '"' + '", "'.join(list(self.file_extensions)) + '"'
log.info(f'Searching by file extension: {extensions_str}')

self.init_filename_filters(options.filenames)
self.parser = FileParser(options.content, quiet=self.quiet)
#! excluded files will not be parsed (work with both: remote and local manspider mode)
self.parser = FileParser(options.content, self.exclude_files, quiet=self.quiet)

self.failed_logons = 0

Expand Down
4 changes: 2 additions & 2 deletions man_spider/lib/spiderling.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def __init__(self, target, parent):
self.parent = parent
self.target = target


# unless we're only searching local files, connect to target
if type(self.target) == pathlib.PosixPath:
self.local = True
Expand Down Expand Up @@ -169,8 +170,7 @@ def parse_file(self, file):
For sole purpose of threading
'''

try:

try:
if type(file) == RemoteFile:
matches = self.parent.parser.parse_file(str(file.tmp_filename), pretty_filename=str(file))
if matches and not self.parent.no_download:
Expand Down
2 changes: 2 additions & 0 deletions man_spider/manspider.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ def main():
parser.add_argument('-f', '--filenames', nargs='+', default=[], help=f'filter filenames using regex (space-separated)', metavar='REGEX')
parser.add_argument('-e', '--extensions',nargs='+', default=[], help='only show filenames with these extensions (space-separated, e.g. `docx xlsx` for only word & excel docs)', metavar='EXT')
parser.add_argument('--exclude-extensions',nargs='+', default=[], help='ignore files with these extensions', metavar='EXT')
# Argparser option to enter a list of excluded files from parsing
parser.add_argument('--exclude-files',nargs='+', default=[], help='dont parse files with these names (space-separated, e.g. `office.exe junk.bin` to skip parsing for office.exe & junk.data)', metavar='EXCLUDEDFILES')
parser.add_argument('-c', '--content', nargs='+', default=[], help='search for file content using regex (multiple supported)', metavar='REGEX')
parser.add_argument('--sharenames', nargs='+', default=[], help='only search shares with these names (multiple supported)', metavar='SHARE')
parser.add_argument('--exclude-sharenames', nargs='*', default=['IPC$', 'C$', 'ADMIN$', 'PRINT$'],help='don\'t search shares with these names (multiple supported)', metavar='SHARE')
Expand Down