Skip to content

Commit

Permalink
[Feature] Delete orphan files (#1575)
Browse files Browse the repository at this point in the history
* Add button for orphan files deletion + API routes and views

* Fix flake8 formatting

---------

Co-authored-by: OhMaley <[email protected]>
  • Loading branch information
Didayolo and OhMaley committed Sep 3, 2024
1 parent d24f565 commit fba9140
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/apps/analytics/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def create_storage_analytics_snapshot():

# Log the results
log_file = (
"/app/logs/" +
"/app/var/logs/" +
"db_storage_inconsistency_" +
current_datetime.strftime("%Y%m%d-%H%M%S") +
".log"
Expand Down
2 changes: 2 additions & 0 deletions src/apps/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
path('analytics/storage_usage_history/', analytics.storage_usage_history, name='storage_usage_history'),
path('analytics/competitions_usage/', analytics.competitions_usage, name='competitions_usage'),
path('analytics/users_usage/', analytics.users_usage, name='users_usage'),
path('analytics/delete_orphan_files/', analytics.delete_orphan_files, name="delete_orphan_files"),
path('analytics/get_orphan_files/', analytics.get_orphan_files, name="get_orphan_files"),

# API Docs
re_path(r'docs(?P<format>\.json|\.yaml)$', schema_view.without_ui(cache_timeout=0), name='schema-json'),
Expand Down
115 changes: 115 additions & 0 deletions src/apps/api/views/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@
from competitions.models import Competition, Submission
from analytics.models import StorageUsageHistory, CompetitionStorageDataPoint, UserStorageDataPoint
from api.serializers.analytics import AnalyticsSerializer
from utils.storage import BundleStorage

import os
import datetime
import coreapi
import pytz
import logging


User = get_user_model()
Expand Down Expand Up @@ -286,3 +289,115 @@ def users_usage(request):
}

return Response(response, status=status.HTTP_200_OK)


@api_view(["GET"])
def get_orphan_files(request):
"""
Get the orphan files based on the last storage analytics
"""

if not request.user.is_superuser:
raise PermissionDenied(detail="Admin only")

logger = logging.getLogger(__name__)

# Find most recent file
most_recent_log_file = get_most_recent_storage_inconsistency_log_file()
if not most_recent_log_file:
logger.warning("No storage inconsistency log file found.")
return Response({"message": "No storage inconsistency log file found."}, status=status.HTTP_404_NOT_FOUND)

# Get the list of orphan files from the content of the most recent log file
log_folder = "/app/logs/"
orphan_files_path = get_files_path_from_orphan_log_file(os.path.join(log_folder, most_recent_log_file))

return Response({"data": orphan_files_path}, status=status.HTTP_200_OK)


@api_view(["DELETE"])
def delete_orphan_files(request):
"""
Delete all orphan files from the storage based on the last storage analytics
"""

if not request.user.is_superuser:
raise PermissionDenied(detail="Admin only")

logger = logging.getLogger(__name__)
logger.info("Delete orphan files started")

# The analytics task generates a db_storage_inconsistency_<date>-<time>.log file that lists, among other things, the orphan files. Let's use it

# Find most recent file
most_recent_log_file = get_most_recent_storage_inconsistency_log_file()
if not most_recent_log_file:
logger.warning("No storage inconsistency log file found. Nothing will be removed")
return Response({"message": "No storage inconsistency log file found. Nothing will be removed"}, status=status.HTTP_404_NOT_FOUND)

# Get the list of orphan files from the content of the most recent log file
log_folder = "/app/logs/"
orphan_files_path = get_files_path_from_orphan_log_file(os.path.join(log_folder, most_recent_log_file))

# Delete those files in batch (max 1000 element at once)
batch_size = 1000
for i in range(0, len(orphan_files_path), batch_size):
batch = orphan_files_path[i:i + batch_size]
objects_formatted = [{'Key': path} for path in batch]
BundleStorage.bucket.delete_objects(Delete={'Objects': objects_formatted})

logger.info("Delete oprhan files finished")
return Response({"message": "done"}, status=status.HTTP_200_OK)


def get_most_recent_storage_inconsistency_log_file():
logger = logging.getLogger(__name__)

log_folder = "/app/logs/"
try:
log_files = [f for f in os.listdir(log_folder) if os.path.isfile(os.path.join(log_folder, f))]
except FileNotFoundError:
logger.info(f"Folder '{log_folder}' does not exist.")
return None

most_recent_log_file = None
most_recent_datetime = None
datetime_format = "%Y%m%d-%H%M%S"
for file in log_files:
try:
basename = os.path.basename(file)
datetime_str = basename[len("db_storage_inconsistency_"):-len(".log")]
file_datetime = datetime.datetime.strptime(datetime_str, datetime_format)
if most_recent_datetime is None or file_datetime > most_recent_datetime:
most_recent_datetime = file_datetime
most_recent_log_file = file
except ValueError:
logger.warning(f"Filename '{file}' does not match the expected format and will be ignored.")

return most_recent_log_file


def get_files_path_from_orphan_log_file(log_file_path):
logger = logging.getLogger(__name__)

files_path = []

try:
with open(log_file_path) as log_file:
lines = log_file.readlines()
orphan_files_lines = []
for i, line in enumerate(lines):
if "Orphaned files" in line:
orphan_files_lines = lines[i + 1:]
break

for orphan_files_line in orphan_files_lines:
files_path.append(orphan_files_line.split(maxsplit=1)[0])
except FileNotFoundError:
logger.error(f"File '{log_file_path}' does not exist.")
except PermissionError:
logger.error(f"Permission denied for reading the file '{log_file_path}'.")
except IOError as e:
logger.error(f"An I/O error occurred while accessing the file at {log_file_path}: {e}")

return files_path
6 changes: 6 additions & 0 deletions src/static/js/ours/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,12 @@ CODALAB.api = {
get_users_usage: (filters) => {
return CODALAB.api.request('GET', `${URLS.API}analytics/users_usage/`, filters);
},
delete_orphan_files: () => {
return CODALAB.api.request('DELETE', `${URLS.API}analytics/delete_orphan_files/`)
},
get_orphan_files: () => {
return CODALAB.api.request('GET', `${URLS.API}analytics/get_orphan_files/`)
},
/*---------------------------------------------------------------------
User Quota and Cleanup
---------------------------------------------------------------------*/
Expand Down
85 changes: 85 additions & 0 deletions src/static/riot/analytics/analytics.tag
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,11 @@
<a class="item" data-tab="usage-history">Usage history</a>
<a class="item" data-tab="competitions-usage">Competitions usage</a>
<a class="item" data-tab="users-usage">Users usage</a>
<div class="delete-oprhans-container">
<button class="ui red button" onclick="{showConfirmationModal}">
<i class="icon warning"></i>Delete orphan files
</button>
</div>
</div>

<div class="ui bottom attached tab segment" data-tab="usage-history">
Expand All @@ -145,6 +150,27 @@
<div class="ui bottom attached tab segment" data-tab="users-usage">
<analytics-storage-users-usage start_date={start_date_string} end_date={end_date_string} resolution={time_unit} is_visible={current_view=="users-usage"}></analytics-storage-users-usage>
</div>

<!-- Orphan Deletion Modal -->
<div ref="confirmation_modal" class="ui small modal">
<div class="header">
Delete orphan files
</div>
<div class="content">
<h4>You are about to delete {nb_orphan_files} orphan files.</h4>
<h5><i>Note: The number of orphan files displayed is based on the most recent storage inconsistency analytics. Its value will be updated during the next storage analytics task.</i></h5>
<h3>This operation is irreversible!</h3>
<h3>Do you want to proceed ?</h3>
</div>
<div class="actions">
<button class="ui icon button {delete_button_color} { loading: delete_button_loading } { disabled: delete_button_disabled }" onclick="{deleteOrphanFiles}">
<i if={delete_button_color=="green"} class="check icon"></i>
{delete_button_text}
</button>
<button class="ui cancel button">Close</button>
</div>
</div>

</div>

<script>
Expand Down Expand Up @@ -185,6 +211,12 @@

/****** Storage *****/

self.nb_orphan_files = 0
self.delete_button_color = "red"
self.delete_button_loading = false
self.delete_button_disabled = false
self.delete_button_text = "Yes, delete all orphan files"

self.one("mount", function () {
// Semantic UI
$('.tabular.menu .item', self.root).tab();
Expand Down Expand Up @@ -294,6 +326,7 @@
self.update_analytics(self.start_date, null, self.time_unit);
self.time_range_shortcut("month");
self.update_chart_resolution("day");
self.getOrphanFiles();
})

/*---------------------------------------------------------------------
Expand Down Expand Up @@ -482,6 +515,53 @@
}
}

// Orhpan related
self.showConfirmationModal = function() {
$(self.refs.confirmation_modal).modal('show');
self.delete_button_color = "red";
self.delete_button_loading = false;
self.delete_button_disabled = false;
self.delete_button_text = "Yes, delete all orphan files";
self.update();
}

self.deleteOrphanFiles = function() {
self.delete_button_loading = true
self.delete_button_disabled = true
self.update()
CODALAB.api.delete_orphan_files()
.done(function (data) {
console.log("done", data);
self.delete_button_color = "green";
self.delete_button_disabled = true;
self.delete_button_text = "Deletion Successful";
})
.fail(function (response) {
console.log("fail response=", response);
toastr.error("Deletion failed, error occurred")
self.delete_button_color = "red";
self.delete_button_disabled = false;
self.delete_button_text = "Deletion Failed";
})
.always(function () {
self.delete_button_loading = false
self.update()
});
}

self.getOrphanFiles = function() {
CODALAB.api.get_orphan_files()
.done(function (data) {
console.log("get_orphan_files success. Response", data);
self.nb_orphan_files = data.data.length
self.update({nb_orphan_files: self.nb_orphan_files});
})
.fail(function (response) {
console.log("get_orphan_files failed. Response=", response);
toastr.error("Get oprhan files failed, error occurred")
});
}

</script>
<style>
analytics {
Expand Down Expand Up @@ -533,5 +613,10 @@
.chart-container {
min-height: 450px;
}

.delete-oprhans-container {
margin-bottom: 5px;
margin-left: auto;
}
</style>
</analytics>

0 comments on commit fba9140

Please sign in to comment.