Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add file name field for better web experience #693

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions conf/web.conf
Original file line number Diff line number Diff line change
Expand Up @@ -229,3 +229,6 @@ zip_pwd = infected

[evtx_download]
enabled = no

[display_origin_filename]
enable = no
68 changes: 48 additions & 20 deletions lib/cuckoo/common/demux.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ def is_valid_type(magic):
return False


def is_valid_path(file_path):
return file_path.get("file_path")


def _sf_chlildren(child):
path_to_extract = False
_, ext = os.path.splitext(child.filename)
Expand All @@ -175,19 +179,25 @@ def _sf_chlildren(child):
f.write(child.contents)
except Exception as e:
log.error(e, exc_info=True)
return path_to_extract
return {
"file_path":path_to_extract,
"filename":child.filename
}


def demux_sflock(filename, options, package):
def demux_sflock(file_path, filename, options, package):
retlist = []
# only extract from files with no extension or with .bin (downloaded from us) or .zip PACKAGE, we do extract from zip archives, to ignore it set ZIP PACKAGES
ext = os.path.splitext(filename)[1]
ext = os.path.splitext(file_path)[1]
if ext == b".bin":
return retlist

# to handle when side file for exec is required
if "file=" in options:
return [filename]
return [{
"file_path":file_path,
"filename":filename
}]

try:
password = "infected"
Expand All @@ -196,42 +206,48 @@ def demux_sflock(filename, options, package):
password = tmp_pass

try:
unpacked = unpack(filename, password=password)
unpacked = unpack(file_path, password=password)
except UnpackException:
unpacked = unpack(filename)
unpacked = unpack(file_path)

if unpacked.package in whitelist_extensions:
return [filename]
return [{
"file_path":file_path,
"filename":filename
}]
if unpacked.package in blacklist_extensions:
return retlist
for sf_child in unpacked.children or []:
if sf_child.to_dict().get("children") and sf_child.to_dict()["children"]:
retlist += [_sf_chlildren(ch) for ch in sf_child.children]
# child is not available, the original file should be put into the list
if filter(None, retlist):
#child is not available, the original file should be put into the list
if filter(is_valid_path, retlist):
retlist.append(_sf_chlildren(sf_child))
else:
retlist.append(_sf_chlildren(sf_child))
except Exception as e:
log.error(e, exc_info=True)

return list(filter(None, retlist))
return list(filter(is_valid_path, retlist))


def demux_sample(filename, package, options, use_sflock=True):
def demux_sample(file_path, filename, package, options, use_sflock=True):
"""
If file is a ZIP, extract its included files and return their file paths
If file is an email, extracts its attachments and return their file paths (later we'll also extract URLs)
"""
# sflock requires filename to be bytes object for Py3
if isinstance(filename, str) and use_sflock:
filename = filename.encode()
if isinstance(file_path, str) and use_sflock:
file_path = file_path.encode()
# if a package was specified, then don't do anything special
if package:
return [filename]
return [{
"file_path":file_path,
"filename":filename
}]

# don't try to extract from office docs
magic = File(filename).get_type()
magic = File(file_path).get_type()

# if file is an Office doc and password is supplied, try to decrypt the doc
if "Microsoft" in magic:
Expand All @@ -246,21 +262,33 @@ def demux_sample(filename, package, options, use_sflock=True):

# don't try to extract from Java archives or executables
if "Java Jar" in magic:
return [filename]
return [{
"file_path":file_path,
"filename":filename
}]
if "PE32" in magic or "MS-DOS executable" in magic:
return [filename]
return [{
"file_path":file_path,
"filename":filename
}]
if any(x in magic for x in VALID_LINUX_TYPES):
return [filename]
return [{
"file_path":file_path,
"filename":filename
}]

retlist = []
if HAS_SFLOCK:
if use_sflock:
# all in one unarchiver
retlist = demux_sflock(filename, options, package)
retlist = demux_sflock(file_path, filename, options, package)
# if it wasn't a ZIP or an email or we weren't able to obtain anything interesting from either, then just submit the
# original file
if not retlist:
retlist.append(filename)
retlist.append({
"file_path":file_path,
"filename":filename
})
else:
if len(retlist) > 10:
retlist = retlist[:10]
Expand Down
2 changes: 2 additions & 0 deletions lib/cuckoo/common/web_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,7 @@ def download_file(**kwargs):
# Keep this as demux_sample_and_add_to_db in DB
task_ids_new, extra_details = db.demux_sample_and_add_to_db(
file_path=kwargs["path"],
filename=kwargs["filename"],
package=package,
timeout=timeout,
options=kwargs["options"],
Expand Down Expand Up @@ -1137,6 +1138,7 @@ def download_from_vt(vtdl, details, opt_filename, settings):
details["fhash"] = h
details["path"] = filename
details["service"] = "VirusTotal"
details["filename"] = os.path.basename(filename)
if not details.get("content", False):
status, task_ids_tmp = download_file(**details)
else:
Expand Down
45 changes: 32 additions & 13 deletions lib/cuckoo/core/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
repconf = Config("reporting")
web_conf = Config("web")
LINUX_ENABLED = web_conf.linux.enabled
ORIGIN_FILENAME_ENABLED = web_conf.display_origin_filename.enabled

if repconf.mongodb.enabled:
import pymongo
Expand All @@ -99,7 +100,7 @@

es = elastic_handler

SCHEMA_VERSION = "8537286ff4d5"
SCHEMA_VERSION = "fd50efe2ab14"
TASK_BANNED = "banned"
TASK_PENDING = "pending"
TASK_RUNNING = "running"
Expand Down Expand Up @@ -385,6 +386,7 @@ class Task(Base):

id = Column(Integer(), primary_key=True)
target = Column(Text(), nullable=False)
filename = Column(Text(), nullable=False)
category = Column(String(255), nullable=False)
cape = Column(String(2048), nullable=True)
timeout = Column(Integer(), server_default="0", nullable=False)
Expand Down Expand Up @@ -1174,6 +1176,7 @@ def register_sample(self, obj, source_url=False):
def add(
self,
obj,
filename,
timeout=0,
package="",
options="",
Expand Down Expand Up @@ -1309,7 +1312,14 @@ def add(
elif isinstance(obj, URL):
task = Task(obj.url)
tags = "x64,x86"


if ORIGIN_FILENAME_ENABLED:
if isinstance(filename, bytes):
task.filename = filename.decode()
else:
task.filename = filename
else:
task.filename = os.path.basename(task.target)
task.category = obj.__class__.__name__.lower()
task.timeout = timeout
task.package = package
Expand Down Expand Up @@ -1368,6 +1378,7 @@ def add(
def add_path(
self,
file_path,
filename,
timeout=0,
package="",
options="",
Expand Down Expand Up @@ -1429,6 +1440,7 @@ def add_path(

return self.add(
File(file_path),
filename,
timeout,
package,
options,
Expand Down Expand Up @@ -1458,6 +1470,7 @@ def add_path(
def demux_sample_and_add_to_db(
self,
file_path,
filename,
timeout=0,
package="",
options="",
Expand Down Expand Up @@ -1498,11 +1511,11 @@ def demux_sample_and_add_to_db(
package = ""
original_options = options
# extract files from the (potential) archive
extracted_files = demux_sample(file_path, package, options)
extracted_files = demux_sample(file_path, filename, package, options)
# check if len is 1 and the same file, if diff register file, and set parent
if not isinstance(file_path, bytes):
file_path = file_path.encode()
if extracted_files and file_path not in extracted_files:
if extracted_files and {"file_path": file_path, "filename": filename} not in extracted_files:
sample_parent_id = self.register_sample(File(file_path), source_url=source_url)
if conf.cuckoo.delete_archive:
os.remove(file_path)
Expand All @@ -1514,27 +1527,27 @@ def demux_sample_and_add_to_db(
if isinstance(runfile, str):
runfile = runfile.encode()
for xfile in extracted_files:
if runfile in xfile.lower():
if runfile in xfile.get("file_path").lower():
extracted_files = [xfile]
break

# create tasks for each file in the archive
for file in extracted_files:
if static:
# we don't need to process extra file if we already have it and config
config = static_config_lookup(file)
config = static_config_lookup(file.get("file_path"))
if config:
task_ids.append(config["id"])
else:
config = static_extraction(file)
config = static_extraction(file.get("file_path"))
if config or static_extraction:
task_ids += self.add_static(
file_path=file, priority=priority, tlp=tlp, user_id=user_id, username=username, options=options
file_path=file.get("file_path"), filename=file.get("filename"), priority=priority, tlp=tlp, user_id=user_id, username=username, options=options
)

if not config and not only_extraction:
if not package:
f = SflockFile.from_path(file)
f = SflockFile.from_path(file.get("file_path"))
tmp_package = sflock_identify(f)
if tmp_package and tmp_package in sandbox_packages:
package = tmp_package
Expand All @@ -1559,7 +1572,8 @@ def demux_sample_and_add_to_db(
options = "dist_extract=1"

task_id = self.add_path(
file_path=file.decode(),
file_path=file.get("file_path").decode(),
filename=file.get("filename"),
timeout=timeout,
priority=priority,
options=options,
Expand Down Expand Up @@ -1598,6 +1612,7 @@ def demux_sample_and_add_to_db(
def add_pcap(
self,
file_path,
filename,
timeout=0,
package="",
options="",
Expand All @@ -1620,6 +1635,7 @@ def add_pcap(
):
return self.add(
PCAP(file_path.decode()),
filename,
timeout,
package,
options,
Expand All @@ -1645,6 +1661,7 @@ def add_pcap(
def add_static(
self,
file_path,
filename,
timeout=0,
package="",
options="",
Expand All @@ -1666,12 +1683,12 @@ def add_static(
user_id=0,
username=False,
):
extracted_files = demux_sample(file_path, package, options)
extracted_files = demux_sample(file_path, filename, package, options)
sample_parent_id = None
# check if len is 1 and the same file, if diff register file, and set parent
if not isinstance(file_path, bytes):
file_path = file_path.encode()
if extracted_files and file_path not in extracted_files:
if extracted_files and {"file_path": file_path, "filename": filename} not in extracted_files:
sample_parent_id = self.register_sample(File(file_path))
if conf.cuckoo.delete_archive:
os.remove(file_path)
Expand All @@ -1680,7 +1697,8 @@ def add_static(
# create tasks for each file in the archive
for file in extracted_files:
task_id = self.add(
Static(file.decode()),
Static(file.get("file_path").decode()),
file.get("filename"),
timeout,
package,
options,
Expand Down Expand Up @@ -1763,6 +1781,7 @@ def add_url(

return self.add(
URL(url),
url,
timeout,
package,
options,
Expand Down
31 changes: 31 additions & 0 deletions utils/db_migration/versions/add_filename_to_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (C) 2010-2015 Cuckoo Foundation.
# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org
# See the file 'docs/LICENSE' for copying permission.

"""add filename to tasks

Revision ID: fd50efe2ab14
Revises: 8537286ff4d5
Create Date: 2021-12-31 08:59:12.774006

"""

# revision identifiers, used by Alembic.
revision = 'fd50efe2ab14'
down_revision = '8537286ff4d5'

import sqlalchemy as sa
from alembic import op


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('tasks', sa.Column('filename', sa.Text(), nullable=False, server_default="None"))
op.alter_column("tasks", "filename", server_default="None")
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('tasks', 'filename')
# ### end Alembic commands ###
2 changes: 1 addition & 1 deletion web/analysis/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def get_analysis_info(db, id=-1, task=None):
new = task.to_dict()
if new["category"] in ("file", "pcap", "static") and new["sample_id"] is not None:
new["sample"] = db.view_sample(new["sample_id"]).to_dict()
filename = os.path.basename(new["target"])
filename = new["filename"]
new.update({"filename": filename})

new.update({"user_task_tags": get_tags_tasks([new["id"]])})
Expand Down
Loading