Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VIRTS-1970] Move files aside on --fresh instead of deleting them #2101

Merged
merged 11 commits into from
Apr 6, 2021
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ data/sources/*
!data/sources/.gitkeep
data/objectives/*
!data/objectives/.gitkeep
data/backup/*
!data/backup/.gitkeep
.tox/

# coverage reports
Expand Down
64 changes: 53 additions & 11 deletions app/service/data_svc.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import asyncio
import copy
import datetime
import glob
import os.path
import os
import pickle
import tarfile
import shutil
import warnings
from base64 import b64encode
Expand All @@ -22,6 +24,18 @@

MIN_MODULE_LEN = 1

DATA_BACKUP_DIR = "data/backup"
DATA_FILE_GLOBS = (
'data/abilities/*',
'data/adversaries/*',
'data/facts/*',
'data/objectives/*',
'data/payloads/*',
'data/results/*',
'data/sources/*',
'data/object_store',
)


class DataService(DataServiceInterface, BaseService):

Expand All @@ -31,19 +45,47 @@ def __init__(self):
schedules=[], plugins=[], obfuscators=[], objectives=[])
self.ram = copy.deepcopy(self.schema)

@staticmethod
def _iter_data_files():
"""Yield paths to data files managed by caldera.

The files paths are relative to the root caldera folder, so they
will begin with "data/".

Note:
This will skip any files starting with '.' (e.g., '.gitkeep').
"""
for data_glob in DATA_FILE_GLOBS:
for f in glob.glob(data_glob):
yield f

@staticmethod
def _delete_file(path):
if not os.path.exists(path):
return
elif os.path.isdir(path):
shutil.rmtree(path)
else:
os.remove(path)

@staticmethod
async def destroy():
if os.path.exists('data/object_store'):
os.remove('data/object_store')
"""Reset the caldera data directory and server state.

for d in ['data/results', 'data/adversaries', 'data/abilities', 'data/facts', 'data/sources', 'data/payloads', 'data/objectives']:
for f in glob.glob('%s/*' % d):
if f.startswith('.'): # e.g., .gitkeep
continue
elif os.path.isdir(f):
shutil.rmtree(f)
else:
os.remove(f)
This creates a gzipped tarball backup of the data files tracked by caldera.
Paths are preserved within the tarball, with all files having "data/" as the
root.
"""
if not os.path.exists(DATA_BACKUP_DIR):
os.mkdir(DATA_BACKUP_DIR)

timestamp = datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')
tarball_path = os.path.join(DATA_BACKUP_DIR, f'backup-{timestamp}.tar.gz')

with tarfile.open(tarball_path, 'w:gz') as tarball:
for file_path in DataService._iter_data_files():
tarball.add(file_path)
DataService._delete_file(file_path)

async def save_state(self):
await self._prune_non_critical_data()
Expand Down
Empty file added data/backup/.gitkeep
Empty file.
3 changes: 2 additions & 1 deletion server.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from app.service.app_svc import AppService
from app.service.auth_svc import AuthService
from app.service.contact_svc import ContactService
from app.service.data_svc import DataService
from app.service.data_svc import DataService, DATA_BACKUP_DIR
from app.service.event_svc import EventService
from app.service.file_svc import FileSvc
from app.service.learning_svc import LearningService
Expand Down Expand Up @@ -122,6 +122,7 @@ def list_str(values):
init_swagger_documentation(app_svc.application)

if args.fresh:
logging.info("Fresh startup: resetting server data. See %s directory for data backups.", DATA_BACKUP_DIR)
asyncio.get_event_loop().run_until_complete(data_svc.destroy())

run_tasks(services=app_svc.get_services())