From 3c33bf6dd3f7ed751c7da37c51c8867a35205a32 Mon Sep 17 00:00:00 2001 From: N1neSun <917549681@qq.com> Date: Tue, 4 Jan 2022 17:50:15 +0800 Subject: [PATCH 1/5] Add file name field for better web experience --- lib/cuckoo/common/demux.py | 68 +++++++++++++------ lib/cuckoo/common/web_utils.py | 2 + lib/cuckoo/core/database.py | 43 ++++++++---- .../versions/add_filename_to_tasks.py | 31 +++++++++ web/analysis/views.py | 2 +- web/apiv2/views.py | 25 ++++--- web/submission/views.py | 8 ++- 7 files changed, 134 insertions(+), 45 deletions(-) create mode 100644 utils/db_migration/versions/add_filename_to_tasks.py diff --git a/lib/cuckoo/common/demux.py b/lib/cuckoo/common/demux.py index 0e24a98bdbc..08039087881 100644 --- a/lib/cuckoo/common/demux.py +++ b/lib/cuckoo/common/demux.py @@ -159,6 +159,10 @@ def is_valid_type(magic): return False +def is_valid_path(file_path): + return file_path.get("file_path") + + def _sf_chlildren(child): path_to_extract = False _, ext = os.path.splitext(child.filename) @@ -175,19 +179,25 @@ def _sf_chlildren(child): f.write(child.contents) except Exception as e: log.error(e, exc_info=True) - return path_to_extract + return { + "file_path":path_to_extract, + "filename":child.filename + } -def demux_sflock(filename, options, package): +def demux_sflock(file_path, filename, options, package): retlist = [] # only extract from files with no extension or with .bin (downloaded from us) or .zip PACKAGE, we do extract from zip archives, to ignore it set ZIP PACKAGES - ext = os.path.splitext(filename)[1] + ext = os.path.splitext(file_path)[1] if ext == b".bin": return retlist # to handle when side file for exec is required if "file=" in options: - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] try: password = "infected" @@ -196,42 +206,48 @@ def demux_sflock(filename, options, package): password = tmp_pass try: - unpacked = unpack(filename, password=password) + unpacked = unpack(file_path, password=password) except UnpackException: - unpacked = unpack(filename) + unpacked = unpack(file_path) if unpacked.package in whitelist_extensions: - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] if unpacked.package in blacklist_extensions: return retlist for sf_child in unpacked.children or []: if sf_child.to_dict().get("children") and sf_child.to_dict()["children"]: retlist += [_sf_chlildren(ch) for ch in sf_child.children] - # child is not available, the original file should be put into the list - if filter(None, retlist): + #child is not available, the original file should be put into the list + if filter(is_valid_path, retlist): retlist.append(_sf_chlildren(sf_child)) else: retlist.append(_sf_chlildren(sf_child)) except Exception as e: log.error(e, exc_info=True) - return list(filter(None, retlist)) + return list(filter(is_valid_path, retlist)) -def demux_sample(filename, package, options, use_sflock=True): +def demux_sample(file_path, filename, package, options, use_sflock=True): """ If file is a ZIP, extract its included files and return their file paths If file is an email, extracts its attachments and return their file paths (later we'll also extract URLs) """ # sflock requires filename to be bytes object for Py3 - if isinstance(filename, str) and use_sflock: - filename = filename.encode() + if isinstance(file_path, str) and use_sflock: + file_path = file_path.encode() # if a package was specified, then don't do anything special if package: - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] # don't try to extract from office docs - magic = File(filename).get_type() + magic = File(file_path).get_type() # if file is an Office doc and password is supplied, try to decrypt the doc if "Microsoft" in magic: @@ -246,21 +262,33 @@ def demux_sample(filename, package, options, use_sflock=True): # don't try to extract from Java archives or executables if "Java Jar" in magic: - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] if "PE32" in magic or "MS-DOS executable" in magic: - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] if any(x in magic for x in VALID_LINUX_TYPES): - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] retlist = [] if HAS_SFLOCK: if use_sflock: # all in one unarchiver - retlist = demux_sflock(filename, options, package) + retlist = demux_sflock(file_path, filename, options, package) # if it wasn't a ZIP or an email or we weren't able to obtain anything interesting from either, then just submit the # original file if not retlist: - retlist.append(filename) + retlist.append({ + "file_path":file_path, + "filename":filename + }) else: if len(retlist) > 10: retlist = retlist[:10] diff --git a/lib/cuckoo/common/web_utils.py b/lib/cuckoo/common/web_utils.py index 0b7078b93f6..f2fa33ae1c8 100644 --- a/lib/cuckoo/common/web_utils.py +++ b/lib/cuckoo/common/web_utils.py @@ -765,6 +765,7 @@ def download_file(**kwargs): # Keep this as demux_sample_and_add_to_db in DB task_ids_new, extra_details = db.demux_sample_and_add_to_db( file_path=kwargs["path"], + filename=kwargs["filename"], package=package, timeout=timeout, options=kwargs["options"], @@ -1220,6 +1221,7 @@ def download_from_vt(vtdl, details, opt_filename, settings): details["fhash"] = h details["path"] = filename details["service"] = "VirusTotal" + details["filename"] = os.path.basename(filename) if not details.get("content", False): status, task_ids_tmp = download_file(**details) else: diff --git a/lib/cuckoo/core/database.py b/lib/cuckoo/core/database.py index 6035de13f29..be7cbfcd211 100644 --- a/lib/cuckoo/core/database.py +++ b/lib/cuckoo/core/database.py @@ -97,7 +97,7 @@ from dev_utils.elasticsearchdb import elastic_handler, get_analysis_index es = elastic_handler -SCHEMA_VERSION = "8537286ff4d5" +SCHEMA_VERSION = "fd50efe2ab14" TASK_BANNED = "banned" TASK_PENDING = "pending" TASK_RUNNING = "running" @@ -383,6 +383,7 @@ class Task(Base): id = Column(Integer(), primary_key=True) target = Column(Text(), nullable=False) + filename = Column(Text(), nullable=False) category = Column(String(255), nullable=False) cape = Column(String(2048), nullable=True) timeout = Column(Integer(), server_default="0", nullable=False) @@ -1172,6 +1173,7 @@ def register_sample(self, obj, source_url=False): def add( self, obj, + filename, timeout=0, package="", options="", @@ -1306,7 +1308,11 @@ def add( elif isinstance(obj, URL): task = Task(obj.url) - + + if isinstance(filename, bytes): + task.filename = filename.decode() + else: + task.filename = filename task.category = obj.__class__.__name__.lower() task.timeout = timeout task.package = package @@ -1365,6 +1371,7 @@ def add( def add_path( self, file_path, + filename, timeout=0, package="", options="", @@ -1426,6 +1433,7 @@ def add_path( return self.add( File(file_path), + filename, timeout, package, options, @@ -1455,6 +1463,7 @@ def add_path( def demux_sample_and_add_to_db( self, file_path, + filename, timeout=0, package="", options="", @@ -1495,11 +1504,11 @@ def demux_sample_and_add_to_db( package = "" original_options = options # extract files from the (potential) archive - extracted_files = demux_sample(file_path, package, options) + extracted_files = demux_sample(file_path, filename, package, options) # check if len is 1 and the same file, if diff register file, and set parent if not isinstance(file_path, bytes): file_path = file_path.encode() - if extracted_files and file_path not in extracted_files: + if extracted_files and {"file_path": file_path, "filename": filename} not in extracted_files: sample_parent_id = self.register_sample(File(file_path), source_url=source_url) if conf.cuckoo.delete_archive: os.remove(file_path) @@ -1511,7 +1520,7 @@ def demux_sample_and_add_to_db( if isinstance(runfile, str): runfile = runfile.encode() for xfile in extracted_files: - if runfile in xfile.lower(): + if runfile in xfile.get("file_path").lower(): extracted_files = [xfile] break @@ -1519,19 +1528,19 @@ def demux_sample_and_add_to_db( for file in extracted_files: if static: # we don't need to process extra file if we already have it and config - config = static_config_lookup(file) + config = static_config_lookup(file.get("file_path")) if config: task_ids.append(config["id"]) else: - config = static_extraction(file) + config = static_extraction(file.get("file_path")) if config or static_extraction: task_ids += self.add_static( - file_path=file, priority=priority, tlp=tlp, user_id=user_id, username=username, options=options + file_path=file.get("file_path"), filename=file.get("filename"), priority=priority, tlp=tlp, user_id=user_id, username=username, options=options ) if not config and only_extraction is False: if not package: - f = SflockFile.from_path(file) + f = SflockFile.from_path(file.get("file_path")) tmp_package = sflock_identify(f) if tmp_package and tmp_package in sandbox_packages: package = tmp_package @@ -1540,7 +1549,7 @@ def demux_sample_and_add_to_db( del f if package == "dll" and "function" not in options: - dll_exports = File(file).get_dll_exports() + dll_exports = File(file.get("file_path")).get_dll_exports() if "DllRegisterServer" in dll_exports: package = "regsvr" elif "xlAutoOpen" in dll_exports: @@ -1556,7 +1565,8 @@ def demux_sample_and_add_to_db( options = "dist_extract=1" task_id = self.add_path( - file_path=file.decode(), + file_path=file.get("file_path").decode(), + filename=file.get("filename"), timeout=timeout, priority=priority, options=options, @@ -1595,6 +1605,7 @@ def demux_sample_and_add_to_db( def add_pcap( self, file_path, + filename, timeout=0, package="", options="", @@ -1617,6 +1628,7 @@ def add_pcap( ): return self.add( PCAP(file_path.decode()), + filename, timeout, package, options, @@ -1642,6 +1654,7 @@ def add_pcap( def add_static( self, file_path, + filename, timeout=0, package="", options="", @@ -1663,12 +1676,12 @@ def add_static( user_id=0, username=False, ): - extracted_files = demux_sample(file_path, package, options) + extracted_files = demux_sample(file_path, filename, package, options) sample_parent_id = None # check if len is 1 and the same file, if diff register file, and set parent if not isinstance(file_path, bytes): file_path = file_path.encode() - if extracted_files and file_path not in extracted_files: + if extracted_files and {"file_path": file_path, "filename": filename} not in extracted_files: sample_parent_id = self.register_sample(File(file_path)) if conf.cuckoo.delete_archive: os.remove(file_path) @@ -1677,7 +1690,8 @@ def add_static( # create tasks for each file in the archive for file in extracted_files: task_id = self.add( - Static(file.decode()), + Static(file.get("file_path").decode()), + file.get("filename"), timeout, package, options, @@ -1760,6 +1774,7 @@ def add_url( return self.add( URL(url), + url, timeout, package, options, diff --git a/utils/db_migration/versions/add_filename_to_tasks.py b/utils/db_migration/versions/add_filename_to_tasks.py new file mode 100644 index 00000000000..a91e4a40051 --- /dev/null +++ b/utils/db_migration/versions/add_filename_to_tasks.py @@ -0,0 +1,31 @@ +# Copyright (C) 2010-2015 Cuckoo Foundation. +# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org +# See the file 'docs/LICENSE' for copying permission. + +"""add filename to tasks + +Revision ID: fd50efe2ab14 +Revises: 8537286ff4d5 +Create Date: 2021-12-31 08:59:12.774006 + +""" + +# revision identifiers, used by Alembic. +revision = 'fd50efe2ab14' +down_revision = '8537286ff4d5' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('tasks', sa.Column('filename', sa.Text(), nullable=False, server_default="None")) + op.alter_column("tasks", "filename", server_default="None") + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('tasks', 'filename') + # ### end Alembic commands ### diff --git a/web/analysis/views.py b/web/analysis/views.py index 583fb7a2edb..fd988055232 100644 --- a/web/analysis/views.py +++ b/web/analysis/views.py @@ -184,7 +184,7 @@ def get_analysis_info(db, id=-1, task=None): new = task.to_dict() if new["category"] in ("file", "pcap", "static") and new["sample_id"] is not None: new["sample"] = db.view_sample(new["sample_id"]).to_dict() - filename = os.path.basename(new["target"]) + filename = new["filename"] new.update({"filename": filename}) new.update({"user_task_tags": get_tags_tasks([new["id"]])}) diff --git a/web/apiv2/views.py b/web/apiv2/views.py index 73cb72b97d3..9fec956cf4b 100644 --- a/web/apiv2/views.py +++ b/web/apiv2/views.py @@ -166,7 +166,7 @@ def tasks_create_static(request): tmp_path = store_temp_file(sample.read(), sanitize_filename(sample.name)) try: task_id, extra_details = db.demux_sample_and_add_to_db( - tmp_path, options=options, priority=priority, static=1, only_extraction=True, user_id=request.user.id or 0 + tmp_path, sample.name, options=options, priority=priority, static=1, only_extraction=True, user_id=request.user.id or 0 ) task_ids.extend(task_id) except CuckooDemuxError as e: @@ -305,11 +305,11 @@ def tasks_create_file(request): else: resp = {"error": True, "error_value": "Failed to convert SAZ to PCAP"} return Response(resp) - task_id = db.add_pcap(file_path=tmp_path) + task_id = db.add_pcap(file_path=tmp_path, filename=sample.name) details["task_ids"].append(task_id) continue if static: - task_id = db.add_static(file_path=tmp_path, priority=priority, user_id=request.user.id or 0) + task_id = db.add_static(file_path=tmp_path, filename=sample.name, priority=priority, user_id=request.user.id or 0) details["task_ids"].append(task_id) continue if quarantine: @@ -319,12 +319,20 @@ def tasks_create_file(request): tmp_path = path except Exception as e: print(e, "removing quarantine") - try: - File(path).get_type() - except TypeError: - details["errors"].append({os.path.basename(tmp_path).decode(): "Error submitting file - bad file type"}) - continue + + if not path: + return render(request, "error.html", {"error": "You uploaded an unsupported quarantine file."}) + + details["filename"] = sample.name + details["path"] = path + details["content"] = get_file_content(path) + status, task_ids_tmp = download_file(**details) + if status == "error": + details["errors"].append({sample.name: task_ids_tmp}) + else: + details["task_ids"] = task_ids_tmp else: + details["filename"] = sample.name details["content"] = get_file_content(tmp_path) status, task_ids_tmp = download_file(**details) if status == "error": @@ -545,6 +553,7 @@ def tasks_create_dlnexec(request): "options": options, "only_extraction": False, "user_id": request.user.id or 0, + "filename": name, } status, task_ids_tmp = download_file(**details) diff --git a/web/submission/views.py b/web/submission/views.py index 24604cfae80..a01c1b3c89a 100644 --- a/web/submission/views.py +++ b/web/submission/views.py @@ -251,6 +251,7 @@ def index(request, resubmit_hash=False): path = store_temp_file(content, filename) details["path"] = path details["content"] = content + details["filename"] = os.path.basename(filename) status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({os.path.basename(filename): task_ids_tmp}) @@ -302,6 +303,7 @@ def index(request, resubmit_hash=False): details["path"] = path details["content"] = get_file_content(path) + details["filename"] = sample.name status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({os.path.basename(path): task_ids_tmp}) @@ -347,6 +349,7 @@ def index(request, resubmit_hash=False): details["path"] = path details["content"] = get_file_content(path) + details["filename"] = sample.name status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({sample.name: task_ids_tmp}) @@ -372,7 +375,7 @@ def index(request, resubmit_hash=False): # let it persist between reboot (if user like to configure it in that way). path = store_temp_file(sample.read(), sample.name) - task_id = db.add_static(file_path=path, priority=priority, tlp=tlp, user_id=request.user.id or 0) + task_id = db.add_static(file_path=path, filename=sample.name, priority=priority, tlp=tlp, user_id=request.user.id or 0) if not task_id: return render(request, "error.html", {"error": "We don't have static extractor for this"}) details["task_ids"] += task_id @@ -409,7 +412,7 @@ def index(request, resubmit_hash=False): else: return render(request, "error.html", {"error": "Conversion from SAZ to PCAP failed."}) - task_id = db.add_pcap(file_path=path, priority=priority, tlp=tlp, user_id=request.user.id or 0) + task_id = db.add_pcap(file_path=path, filename=sample.name, priority=priority, tlp=tlp, user_id=request.user.id or 0) if task_id: details["task_ids"].append(task_id) @@ -479,6 +482,7 @@ def index(request, resubmit_hash=False): details["content"] = get_file_content(path) details["service"] = "DLnExec" details["source_url"] = url + details["filename"] = name status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({name: task_ids_tmp}) From f8fedd2e5e17c2ec86a36e58e56a6db55fa582a3 Mon Sep 17 00:00:00 2001 From: N1neSun <917549681@qq.com> Date: Tue, 4 Jan 2022 17:54:49 +0800 Subject: [PATCH 2/5] Add file name field for better web experience --- lib/cuckoo/common/demux.py | 68 ++++-- lib/cuckoo/common/web_utils.py | 2 + lib/cuckoo/core/database.py | 43 ++-- .../versions/add_filename_to_tasks.py | 31 +++ web/analysis/views.py | 2 +- web/apiv2/views.py | 229 +++++++++--------- web/submission/views.py | 8 +- 7 files changed, 231 insertions(+), 152 deletions(-) create mode 100644 utils/db_migration/versions/add_filename_to_tasks.py diff --git a/lib/cuckoo/common/demux.py b/lib/cuckoo/common/demux.py index 0e24a98bdbc..08039087881 100644 --- a/lib/cuckoo/common/demux.py +++ b/lib/cuckoo/common/demux.py @@ -159,6 +159,10 @@ def is_valid_type(magic): return False +def is_valid_path(file_path): + return file_path.get("file_path") + + def _sf_chlildren(child): path_to_extract = False _, ext = os.path.splitext(child.filename) @@ -175,19 +179,25 @@ def _sf_chlildren(child): f.write(child.contents) except Exception as e: log.error(e, exc_info=True) - return path_to_extract + return { + "file_path":path_to_extract, + "filename":child.filename + } -def demux_sflock(filename, options, package): +def demux_sflock(file_path, filename, options, package): retlist = [] # only extract from files with no extension or with .bin (downloaded from us) or .zip PACKAGE, we do extract from zip archives, to ignore it set ZIP PACKAGES - ext = os.path.splitext(filename)[1] + ext = os.path.splitext(file_path)[1] if ext == b".bin": return retlist # to handle when side file for exec is required if "file=" in options: - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] try: password = "infected" @@ -196,42 +206,48 @@ def demux_sflock(filename, options, package): password = tmp_pass try: - unpacked = unpack(filename, password=password) + unpacked = unpack(file_path, password=password) except UnpackException: - unpacked = unpack(filename) + unpacked = unpack(file_path) if unpacked.package in whitelist_extensions: - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] if unpacked.package in blacklist_extensions: return retlist for sf_child in unpacked.children or []: if sf_child.to_dict().get("children") and sf_child.to_dict()["children"]: retlist += [_sf_chlildren(ch) for ch in sf_child.children] - # child is not available, the original file should be put into the list - if filter(None, retlist): + #child is not available, the original file should be put into the list + if filter(is_valid_path, retlist): retlist.append(_sf_chlildren(sf_child)) else: retlist.append(_sf_chlildren(sf_child)) except Exception as e: log.error(e, exc_info=True) - return list(filter(None, retlist)) + return list(filter(is_valid_path, retlist)) -def demux_sample(filename, package, options, use_sflock=True): +def demux_sample(file_path, filename, package, options, use_sflock=True): """ If file is a ZIP, extract its included files and return their file paths If file is an email, extracts its attachments and return their file paths (later we'll also extract URLs) """ # sflock requires filename to be bytes object for Py3 - if isinstance(filename, str) and use_sflock: - filename = filename.encode() + if isinstance(file_path, str) and use_sflock: + file_path = file_path.encode() # if a package was specified, then don't do anything special if package: - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] # don't try to extract from office docs - magic = File(filename).get_type() + magic = File(file_path).get_type() # if file is an Office doc and password is supplied, try to decrypt the doc if "Microsoft" in magic: @@ -246,21 +262,33 @@ def demux_sample(filename, package, options, use_sflock=True): # don't try to extract from Java archives or executables if "Java Jar" in magic: - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] if "PE32" in magic or "MS-DOS executable" in magic: - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] if any(x in magic for x in VALID_LINUX_TYPES): - return [filename] + return [{ + "file_path":file_path, + "filename":filename + }] retlist = [] if HAS_SFLOCK: if use_sflock: # all in one unarchiver - retlist = demux_sflock(filename, options, package) + retlist = demux_sflock(file_path, filename, options, package) # if it wasn't a ZIP or an email or we weren't able to obtain anything interesting from either, then just submit the # original file if not retlist: - retlist.append(filename) + retlist.append({ + "file_path":file_path, + "filename":filename + }) else: if len(retlist) > 10: retlist = retlist[:10] diff --git a/lib/cuckoo/common/web_utils.py b/lib/cuckoo/common/web_utils.py index 0b7078b93f6..f2fa33ae1c8 100644 --- a/lib/cuckoo/common/web_utils.py +++ b/lib/cuckoo/common/web_utils.py @@ -765,6 +765,7 @@ def download_file(**kwargs): # Keep this as demux_sample_and_add_to_db in DB task_ids_new, extra_details = db.demux_sample_and_add_to_db( file_path=kwargs["path"], + filename=kwargs["filename"], package=package, timeout=timeout, options=kwargs["options"], @@ -1220,6 +1221,7 @@ def download_from_vt(vtdl, details, opt_filename, settings): details["fhash"] = h details["path"] = filename details["service"] = "VirusTotal" + details["filename"] = os.path.basename(filename) if not details.get("content", False): status, task_ids_tmp = download_file(**details) else: diff --git a/lib/cuckoo/core/database.py b/lib/cuckoo/core/database.py index 6035de13f29..be7cbfcd211 100644 --- a/lib/cuckoo/core/database.py +++ b/lib/cuckoo/core/database.py @@ -97,7 +97,7 @@ from dev_utils.elasticsearchdb import elastic_handler, get_analysis_index es = elastic_handler -SCHEMA_VERSION = "8537286ff4d5" +SCHEMA_VERSION = "fd50efe2ab14" TASK_BANNED = "banned" TASK_PENDING = "pending" TASK_RUNNING = "running" @@ -383,6 +383,7 @@ class Task(Base): id = Column(Integer(), primary_key=True) target = Column(Text(), nullable=False) + filename = Column(Text(), nullable=False) category = Column(String(255), nullable=False) cape = Column(String(2048), nullable=True) timeout = Column(Integer(), server_default="0", nullable=False) @@ -1172,6 +1173,7 @@ def register_sample(self, obj, source_url=False): def add( self, obj, + filename, timeout=0, package="", options="", @@ -1306,7 +1308,11 @@ def add( elif isinstance(obj, URL): task = Task(obj.url) - + + if isinstance(filename, bytes): + task.filename = filename.decode() + else: + task.filename = filename task.category = obj.__class__.__name__.lower() task.timeout = timeout task.package = package @@ -1365,6 +1371,7 @@ def add( def add_path( self, file_path, + filename, timeout=0, package="", options="", @@ -1426,6 +1433,7 @@ def add_path( return self.add( File(file_path), + filename, timeout, package, options, @@ -1455,6 +1463,7 @@ def add_path( def demux_sample_and_add_to_db( self, file_path, + filename, timeout=0, package="", options="", @@ -1495,11 +1504,11 @@ def demux_sample_and_add_to_db( package = "" original_options = options # extract files from the (potential) archive - extracted_files = demux_sample(file_path, package, options) + extracted_files = demux_sample(file_path, filename, package, options) # check if len is 1 and the same file, if diff register file, and set parent if not isinstance(file_path, bytes): file_path = file_path.encode() - if extracted_files and file_path not in extracted_files: + if extracted_files and {"file_path": file_path, "filename": filename} not in extracted_files: sample_parent_id = self.register_sample(File(file_path), source_url=source_url) if conf.cuckoo.delete_archive: os.remove(file_path) @@ -1511,7 +1520,7 @@ def demux_sample_and_add_to_db( if isinstance(runfile, str): runfile = runfile.encode() for xfile in extracted_files: - if runfile in xfile.lower(): + if runfile in xfile.get("file_path").lower(): extracted_files = [xfile] break @@ -1519,19 +1528,19 @@ def demux_sample_and_add_to_db( for file in extracted_files: if static: # we don't need to process extra file if we already have it and config - config = static_config_lookup(file) + config = static_config_lookup(file.get("file_path")) if config: task_ids.append(config["id"]) else: - config = static_extraction(file) + config = static_extraction(file.get("file_path")) if config or static_extraction: task_ids += self.add_static( - file_path=file, priority=priority, tlp=tlp, user_id=user_id, username=username, options=options + file_path=file.get("file_path"), filename=file.get("filename"), priority=priority, tlp=tlp, user_id=user_id, username=username, options=options ) if not config and only_extraction is False: if not package: - f = SflockFile.from_path(file) + f = SflockFile.from_path(file.get("file_path")) tmp_package = sflock_identify(f) if tmp_package and tmp_package in sandbox_packages: package = tmp_package @@ -1540,7 +1549,7 @@ def demux_sample_and_add_to_db( del f if package == "dll" and "function" not in options: - dll_exports = File(file).get_dll_exports() + dll_exports = File(file.get("file_path")).get_dll_exports() if "DllRegisterServer" in dll_exports: package = "regsvr" elif "xlAutoOpen" in dll_exports: @@ -1556,7 +1565,8 @@ def demux_sample_and_add_to_db( options = "dist_extract=1" task_id = self.add_path( - file_path=file.decode(), + file_path=file.get("file_path").decode(), + filename=file.get("filename"), timeout=timeout, priority=priority, options=options, @@ -1595,6 +1605,7 @@ def demux_sample_and_add_to_db( def add_pcap( self, file_path, + filename, timeout=0, package="", options="", @@ -1617,6 +1628,7 @@ def add_pcap( ): return self.add( PCAP(file_path.decode()), + filename, timeout, package, options, @@ -1642,6 +1654,7 @@ def add_pcap( def add_static( self, file_path, + filename, timeout=0, package="", options="", @@ -1663,12 +1676,12 @@ def add_static( user_id=0, username=False, ): - extracted_files = demux_sample(file_path, package, options) + extracted_files = demux_sample(file_path, filename, package, options) sample_parent_id = None # check if len is 1 and the same file, if diff register file, and set parent if not isinstance(file_path, bytes): file_path = file_path.encode() - if extracted_files and file_path not in extracted_files: + if extracted_files and {"file_path": file_path, "filename": filename} not in extracted_files: sample_parent_id = self.register_sample(File(file_path)) if conf.cuckoo.delete_archive: os.remove(file_path) @@ -1677,7 +1690,8 @@ def add_static( # create tasks for each file in the archive for file in extracted_files: task_id = self.add( - Static(file.decode()), + Static(file.get("file_path").decode()), + file.get("filename"), timeout, package, options, @@ -1760,6 +1774,7 @@ def add_url( return self.add( URL(url), + url, timeout, package, options, diff --git a/utils/db_migration/versions/add_filename_to_tasks.py b/utils/db_migration/versions/add_filename_to_tasks.py new file mode 100644 index 00000000000..a91e4a40051 --- /dev/null +++ b/utils/db_migration/versions/add_filename_to_tasks.py @@ -0,0 +1,31 @@ +# Copyright (C) 2010-2015 Cuckoo Foundation. +# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org +# See the file 'docs/LICENSE' for copying permission. + +"""add filename to tasks + +Revision ID: fd50efe2ab14 +Revises: 8537286ff4d5 +Create Date: 2021-12-31 08:59:12.774006 + +""" + +# revision identifiers, used by Alembic. +revision = 'fd50efe2ab14' +down_revision = '8537286ff4d5' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('tasks', sa.Column('filename', sa.Text(), nullable=False, server_default="None")) + op.alter_column("tasks", "filename", server_default="None") + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('tasks', 'filename') + # ### end Alembic commands ### diff --git a/web/analysis/views.py b/web/analysis/views.py index 583fb7a2edb..fd988055232 100644 --- a/web/analysis/views.py +++ b/web/analysis/views.py @@ -184,7 +184,7 @@ def get_analysis_info(db, id=-1, task=None): new = task.to_dict() if new["category"] in ("file", "pcap", "static") and new["sample_id"] is not None: new["sample"] = db.view_sample(new["sample_id"]).to_dict() - filename = os.path.basename(new["target"]) + filename = new["filename"] new.update({"filename": filename}) new.update({"user_task_tags": get_tags_tasks([new["id"]])}) diff --git a/web/apiv2/views.py b/web/apiv2/views.py index ff3ecd06b16..4dc60bce63a 100644 --- a/web/apiv2/views.py +++ b/web/apiv2/views.py @@ -166,7 +166,7 @@ def tasks_create_static(request): tmp_path = store_temp_file(sample.read(), sanitize_filename(sample.name)) try: task_id, extra_details = db.demux_sample_and_add_to_db( - tmp_path, options=options, priority=priority, static=1, only_extraction=True, user_id=request.user.id or 0 + tmp_path, sample.name, options=options, priority=priority, static=1, only_extraction=True, user_id=request.user.id or 0 ) task_ids.extend(task_id) except CuckooDemuxError as e: @@ -305,11 +305,11 @@ def tasks_create_file(request): else: resp = {"error": True, "error_value": "Failed to convert SAZ to PCAP"} return Response(resp) - task_id = db.add_pcap(file_path=tmp_path) + task_id = db.add_pcap(file_path=tmp_path, filename=sample.name) details["task_ids"].append(task_id) continue if static: - task_id = db.add_static(file_path=tmp_path, priority=priority, user_id=request.user.id or 0) + task_id = db.add_static(file_path=tmp_path, filename=sample.name, priority=priority, user_id=request.user.id or 0) details["task_ids"].append(task_id) continue if quarantine: @@ -319,11 +319,11 @@ def tasks_create_file(request): tmp_path = path except Exception as e: print(e, "removing quarantine") - - if not path: - resp = {"error": True, "error_value": "You uploaded an unsupported quarantine file."} - return Response(resp) + if not path: + return render(request, "error.html", {"error": "You uploaded an unsupported quarantine file."}) + + details["filename"] = sample.name details["path"] = path details["content"] = get_file_content(path) status, task_ids_tmp = download_file(**details) @@ -332,6 +332,7 @@ def tasks_create_file(request): else: details["task_ids"] = task_ids_tmp else: + details["filename"] = sample.name details["content"] = get_file_content(tmp_path) status, task_ids_tmp = download_file(**details) if status == "error": @@ -552,6 +553,7 @@ def tasks_create_dlnexec(request): "options": options, "only_extraction": False, "user_id": request.user.id or 0, + "filename": name, } status, task_ids_tmp = download_file(**details) @@ -582,106 +584,6 @@ def tasks_create_dlnexec(request): return Response(resp) -# Download a file from VT for analysis -@csrf_exempt -@api_view(["POST"]) -def tasks_vtdl(request): - resp = {} - if request.method == "POST": - # Check if this API function is enabled - if not apiconf.vtdl.get("enabled"): - resp = {"error": True, "error_value": "VTDL Create API is Disabled"} - return Response(resp) - - hashes = request.data.get("vtdl".strip()) - if not hashes: - hashes = request.data.get("hashes".strip()) - - if not hashes: - resp = {"error": True, "error_value": "vtdl (hash list) value is empty"} - return Response(resp) - - resp["error"] = False - options = request.data.get("options", "") - custom = request.data.get("custom", "") - machine = request.data.get("machine", "") - - opt_filename = get_user_filename(options, custom) - - task_machines = [] - vm_list = [] - opt_apikey = False - opts = get_options(options) - if opts: - opt_apikey = opts.get("apikey", False) - - if not (settings.VTDL_KEY or opt_apikey) or not settings.VTDL_PATH: - resp = { - "error": True, - "error_value": "You specified VirusTotal but must edit the file and specify your VTDL_KEY variable and VTDL_PATH base directory", - } - return Response(resp) - - for vm in db.list_machines(): - vm_list.append(vm.label) - - if machine.lower() == "all": - if not apiconf.filecreate.get("allmachines"): - resp = {"error": True, "error_value": "Machine=all is disabled using the API"} - return Response(resp) - for entry in vm_list: - task_machines.append(entry) - else: - # Check if VM is in our machines table - if machine == "" or machine in vm_list: - task_machines.append(machine) - # Error if its not - else: - resp = { - "error": True, - "error_value": ("Machine '{0}' does not exist. Available: {1}".format(machine, ", ".join(vm_list))), - } - return Response(resp) - - details = { - "apikey": settings.VTDL_KEY or opt_apikey, - "errors": [], - "content": False, - "request": request, - "task_ids": [], - "url": False, - "params": {}, - "headers": {}, - "service": "VirusTotal", - "path": "", - "fhash": False, - "options": options, - "only_extraction": False, - "user_id": request.user.id or 0, - } - - details = download_from_vt(hashes, details, opt_filename, settings) - - if details["task_ids"]: - tasks_count = len(details["task_ids"]) - else: - tasks_count = 0 - if tasks_count > 0: - resp["data"] = {} - resp["errors"] = details["errors"] - resp["data"]["task_ids"] = details["task_ids"] - if len(details["task_ids"]) == 1: - resp["data"]["message"] = "Task ID {0} has been submitted".format(str(details["task_ids"][0])) - else: - resp["data"]["message"] = "Task IDs {0} have been submitted".format(", ".join(str(x) for x in details["task_ids"])) - else: - resp = {"error": True, "error_value": "Error adding task to database", "errors": details["errors"]} - else: - resp = {"error": True, "error_value": "Method not allowed"} - - return Response(resp) - - # Return Sample information. @csrf_exempt @api_view(["GET"]) @@ -1207,10 +1109,7 @@ def tasks_iocs(request, task_id, detail=None): if repconf.mongodb.get("enabled") and not buf: buf = results_db.analysis.find_one({"info.id": int(task_id)}) if es_as_db and not buf: - tmp = es.search( - index=get_analysis_index(), - body=get_query_by_info_id(task_id) - )["hits"]["hits"] + tmp = es.search(index=get_analysis_index(), body=get_query_by_info_id(task_id))["hits"]["hits"] if tmp: buf = tmp[-1]["_source"] else: @@ -1988,10 +1887,7 @@ def tasks_config(request, task_id, cape_name=False): with open(jfile, "r") as jdata: buf = json.load(jdata) if es_as_db and not buf: - tmp = es.search( - index=get_analysis_index(), - body=get_query_by_info_id(task_id) - )["hits"]["hits"] + tmp = es.search(index=get_analysis_index(), body=get_query_by_info_id(task_id))["hits"]["hits"] if len(tmp) > 1: buf = tmp[-1]["_source"] elif len(tmp) == 1: @@ -2088,3 +1984,106 @@ def tasks_delete_many(request): def limit_exceeded(request, exception): resp = {"error": True, "error_value": "Rate limit exceeded for this API"} return Response(resp) + + +dl_service_map = { + "VirusTotal": "vtdl", +} + +def common_download_func(service, request): + resp = {} + hashes = request.data.get(dl_service_map[service].strip()) + if not hashes: + hashes = request.POST.get("hashes".strip(), None) + if not hashes: + return Response({"error": True, "error_value": f"hashes (hash list) or {dl_service_map[service]} value is empty"}) + resp["error"] = False + # Parse potential POST options (see submission/views.py) + options = request.POST.get("options", "") + custom = request.POST.get("custom", "") + machine = request.POST.get("machine", "") + opt_filename = get_user_filename(options, custom) + + details = {} + task_machines = [] + vm_list = [] + opt_apikey = False + + if service == "VirusTotal": + opts = get_options(options) + if opts: + opt_apikey = opts.get("apikey", False) + + if not (settings.VTDL_KEY or opt_apikey) or not settings.VTDL_PATH: + resp = { + "error": True, + "error_value": "You specified VirusTotal but must edit the file and specify your VTDL_KEY variable and VTDL_PATH base directory", + } + return Response(resp) + + for vm in db.list_machines(): + vm_list.append(vm.label) + if machine.lower() == "all": + if not apiconf.filecreate.get("allmachines"): + resp = {"error": True, "error_value": "Machine=all is disabled using the API"} + return Response(resp) + for entry in vm_list: + task_machines.append(entry) + else: + # Check if VM is in our machines table + if machine == "" or machine in vm_list: + task_machines.append(machine) + # Error if its not + else: + resp = { + "error": True, + "error_value": ("Machine '{0}' does not exist. Available: {1}".format(machine, ", ".join(vm_list))), + } + return Response(resp) + + details = { + "errors": [], + "content": False, + "request": request, + "task_id": [], + "url": False, + "params": {}, + "headers": {}, + "path": "", + "fhash": False, + "options": options, + "only_extraction": False, + "service": service, + "user_id": request.user.id or 0, + } + + if service == "VirusTotal": + details["apikey"] = settings.VTDL_KEY or opt_apikey + details = download_from_vt(hashes, details, opt_filename, settings) + if isinstance(details.get("task_ids"), list): + tasks_count = len(details["task_ids"]) + else: + tasks_count = 0 + if tasks_count > 0: + resp["data"] = {} + resp["errors"] = details["errors"] + resp["data"]["task_ids"] = details.get("task_ids", []) + if len(details.get("task_ids", [])) == 1: + resp["data"]["message"] = "Task ID {0} has been submitted".format(str(details.get("task_ids", [])[0])) + else: + resp["data"]["message"] = "Task IDs {0} have been submitted".format( + ", ".join(str(x) for x in details.get("task_ids", [])) + ) + else: + resp = {"error": True, "error_value": "Error adding task to database", "errors": details["errors"]} + + return Response(resp) + + +@csrf_exempt +@api_view(["POST"]) +def tasks_vtdl(request): + # Check if this API function is enabled + if not apiconf.vtdl.get("enabled"): + return Response({"error": True, "error_value": "VTDL Create API is Disabled"}) + return common_download_func("VirusTotal", request) diff --git a/web/submission/views.py b/web/submission/views.py index 24604cfae80..a01c1b3c89a 100644 --- a/web/submission/views.py +++ b/web/submission/views.py @@ -251,6 +251,7 @@ def index(request, resubmit_hash=False): path = store_temp_file(content, filename) details["path"] = path details["content"] = content + details["filename"] = os.path.basename(filename) status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({os.path.basename(filename): task_ids_tmp}) @@ -302,6 +303,7 @@ def index(request, resubmit_hash=False): details["path"] = path details["content"] = get_file_content(path) + details["filename"] = sample.name status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({os.path.basename(path): task_ids_tmp}) @@ -347,6 +349,7 @@ def index(request, resubmit_hash=False): details["path"] = path details["content"] = get_file_content(path) + details["filename"] = sample.name status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({sample.name: task_ids_tmp}) @@ -372,7 +375,7 @@ def index(request, resubmit_hash=False): # let it persist between reboot (if user like to configure it in that way). path = store_temp_file(sample.read(), sample.name) - task_id = db.add_static(file_path=path, priority=priority, tlp=tlp, user_id=request.user.id or 0) + task_id = db.add_static(file_path=path, filename=sample.name, priority=priority, tlp=tlp, user_id=request.user.id or 0) if not task_id: return render(request, "error.html", {"error": "We don't have static extractor for this"}) details["task_ids"] += task_id @@ -409,7 +412,7 @@ def index(request, resubmit_hash=False): else: return render(request, "error.html", {"error": "Conversion from SAZ to PCAP failed."}) - task_id = db.add_pcap(file_path=path, priority=priority, tlp=tlp, user_id=request.user.id or 0) + task_id = db.add_pcap(file_path=path, filename=sample.name, priority=priority, tlp=tlp, user_id=request.user.id or 0) if task_id: details["task_ids"].append(task_id) @@ -479,6 +482,7 @@ def index(request, resubmit_hash=False): details["content"] = get_file_content(path) details["service"] = "DLnExec" details["source_url"] = url + details["filename"] = name status, task_ids_tmp = download_file(**details) if status == "error": details["errors"].append({name: task_ids_tmp}) From fee6ecaaa8155653da31b6da5610d01dad7e81af Mon Sep 17 00:00:00 2001 From: N1neSun <917549681@qq.com> Date: Tue, 4 Jan 2022 18:07:25 +0800 Subject: [PATCH 3/5] Fix conflict --- web/apiv2/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/apiv2/views.py b/web/apiv2/views.py index 4dc60bce63a..3670cc1d8aa 100644 --- a/web/apiv2/views.py +++ b/web/apiv2/views.py @@ -321,7 +321,8 @@ def tasks_create_file(request): print(e, "removing quarantine") if not path: - return render(request, "error.html", {"error": "You uploaded an unsupported quarantine file."}) + resp = {"error": True, "error_value": "You uploaded an unsupported quarantine file."} + return Response(resp) details["filename"] = sample.name details["path"] = path From 1ab0e1c02d74873623af41a22a265f7fe6e9ffc9 Mon Sep 17 00:00:00 2001 From: doomedraven Date: Tue, 4 Jan 2022 12:25:23 +0100 Subject: [PATCH 4/5] Update add_filename_to_tasks.py --- utils/db_migration/versions/add_filename_to_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/db_migration/versions/add_filename_to_tasks.py b/utils/db_migration/versions/add_filename_to_tasks.py index a91e4a40051..93df445cfa1 100644 --- a/utils/db_migration/versions/add_filename_to_tasks.py +++ b/utils/db_migration/versions/add_filename_to_tasks.py @@ -14,8 +14,8 @@ revision = 'fd50efe2ab14' down_revision = '8537286ff4d5' -from alembic import op import sqlalchemy as sa +from alembic import op def upgrade(): From e93954e9cbdf0bfa4d26d425c8d4f2cbce5ad73a Mon Sep 17 00:00:00 2001 From: N1neSun <917549681@qq.com> Date: Mon, 10 Jan 2022 18:20:19 +0800 Subject: [PATCH 5/5] Add conf enable origin filename --- conf/web.conf | 3 +++ lib/cuckoo/core/database.py | 10 +++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/conf/web.conf b/conf/web.conf index d54542a3fb9..ae0c8aab5e8 100644 --- a/conf/web.conf +++ b/conf/web.conf @@ -224,3 +224,6 @@ zip_pwd = infected [evtx_download] enabled = no + +[display_origin_filename] +enable = no \ No newline at end of file diff --git a/lib/cuckoo/core/database.py b/lib/cuckoo/core/database.py index 8d6c33d979b..102a691f6fc 100644 --- a/lib/cuckoo/core/database.py +++ b/lib/cuckoo/core/database.py @@ -82,6 +82,7 @@ repconf = Config("reporting") web_conf = Config("web") LINUX_ENABLED = web_conf.linux.enabled +ORIGIN_FILENAME_ENABLED = web_conf.display_origin_filename.enabled if repconf.mongodb.enabled: import pymongo @@ -1309,10 +1310,13 @@ def add( elif isinstance(obj, URL): task = Task(obj.url) - if isinstance(filename, bytes): - task.filename = filename.decode() + if ORIGIN_FILENAME_ENABLED: + if isinstance(filename, bytes): + task.filename = filename.decode() + else: + task.filename = filename else: - task.filename = filename + task.filename = os.path.basename(task.target) task.category = obj.__class__.__name__.lower() task.timeout = timeout task.package = package