Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up rm bundles #4312

Merged
merged 41 commits into from
Dec 7, 2022
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
f092646
added in some timing code for this
AndrewJGaut Nov 7, 2022
5847974
add more precise timing code
AndrewJGaut Nov 17, 2022
c1347f7
minor change
AndrewJGaut Nov 17, 2022
ad40d6a
minor changes
AndrewJGaut Nov 17, 2022
c4d53e3
remove debug statement
AndrewJGaut Nov 18, 2022
a7c4b23
fix minor bug
AndrewJGaut Nov 24, 2022
8cadd5a
minor formatting changes
AndrewJGaut Nov 24, 2022
d3b1946
Merge branch 'master' into fix/4217-speed-up-rm-bundle
AndrewJGaut Nov 24, 2022
10468b0
clean up code; delete timing logging statements
AndrewJGaut Nov 24, 2022
9d37ca7
Merge branch 'fix/4217-speed-up-rm-bundle' of github.com:codalab/coda…
AndrewJGaut Nov 24, 2022
8065d78
modify method of obtaining solution to account for symlink and --data…
AndrewJGaut Nov 28, 2022
fc0dc05
fix earlier issues with code. it works now
AndrewJGaut Nov 29, 2022
7301ed7
Added test for symlinks
AndrewJGaut Nov 29, 2022
5da242e
minor changes
AndrewJGaut Nov 29, 2022
c637062
minor change
AndrewJGaut Nov 30, 2022
cec09e6
Attempt to fix issue with Azure storage deletion
AndrewJGaut Dec 2, 2022
849a580
fix merge conflict
AndrewJGaut Dec 3, 2022
681bd8e
fix merge conflict again
AndrewJGaut Dec 3, 2022
503b757
Fix issue with Azure storage and bundle location
AndrewJGaut Dec 4, 2022
a88bef8
Fix another issue with Azure storage
AndrewJGaut Dec 4, 2022
e82f54f
fixing azure error
AndrewJGaut Dec 4, 2022
4f85cd6
Finally got the Azure tests to pass!
AndrewJGaut Dec 4, 2022
895feca
minor formatting change
AndrewJGaut Dec 4, 2022
f47f4c9
minor changes
AndrewJGaut Dec 4, 2022
2ed46bf
Merge branch 'master' into fix/4217-speed-up-rm-bundle
AndrewJGaut Dec 5, 2022
f700ac0
Merge branch 'master' of github.com:codalab/codalab-worksheets into f…
AndrewJGaut Dec 5, 2022
efd2956
minor modification
AndrewJGaut Dec 5, 2022
6d1984e
Merge branch 'fix/4217-speed-up-rm-bundle' of github.com:codalab/coda…
AndrewJGaut Dec 5, 2022
783b431
Merge branch 'master' into fix/4217-speed-up-rm-bundle
AndrewJGaut Dec 6, 2022
8b64d42
minor change
AndrewJGaut Dec 6, 2022
f30bf4a
Merge branch 'fix/4217-speed-up-rm-bundle' of github.com:codalab/coda…
AndrewJGaut Dec 6, 2022
e24b47b
minor change to tests
AndrewJGaut Dec 6, 2022
3cf2bdf
remove debug statmentes
AndrewJGaut Dec 6, 2022
96a5852
minor modification
AndrewJGaut Dec 6, 2022
7db87ba
Change order of tests
AndrewJGaut Dec 6, 2022
cc8d352
add in extra test to find issue
AndrewJGaut Dec 6, 2022
00cdaf7
Fix issue with earlier merged PR
AndrewJGaut Dec 6, 2022
81a42f5
Remove debug statements
AndrewJGaut Dec 6, 2022
e3ae1cd
minor formatting change
AndrewJGaut Dec 6, 2022
c217119
Add in file location from Jiani's PR
AndrewJGaut Dec 7, 2022
bb30ccb
Merge branch 'master' into fix/4217-speed-up-rm-bundle
AndrewJGaut Dec 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions codalab/lib/bundle_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,9 @@ def cleanup(self, uuid, dry_run):
'''
absolute_path = self.get_bundle_location(uuid)
print("cleanup: data %s" % absolute_path, file=sys.stderr)
if not dry_run:
path_util.remove(absolute_path)
if dry_run:
return False
return path_util.remove(absolute_path)

def health_check(self, model, force=False, compute_data_hash=False, repair_hashes=False):
"""
Expand Down
6 changes: 5 additions & 1 deletion codalab/lib/path_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,20 +310,24 @@ def remove(path):

if not FileSystems.exists(path):
FileSystems.delete([path])
return
return True # not sure about this one
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return True look good to me

check_isvalid(path, 'remove')
set_write_permissions(path) # Allow permissions
if os.path.islink(path):
os.unlink(path)
return False
elif os.path.isdir(path):
try:
shutil.rmtree(path)
return True
except shutil.Error:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there is an error, it will have no return value here? Should we use finally here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! I added a return False statement at the end of the function to catch any other cases.

pass
else:
os.remove(path)
return True
if os.path.exists(path):
print('Failed to remove %s' % path)
return False


def soft_link(source, path):
Expand Down
8 changes: 8 additions & 0 deletions codalab/model/bundle_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2708,6 +2708,14 @@ def increment_user_time_used(self, user_id, amount):
user_info['time_used'] += amount
self.update_user_info(user_info)

def increment_user_disk_used(self, user_id, amount):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add type hints.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

"""
User used some time.
"""
user_info = self.get_user_info(user_id)
user_info['disk_used'] += amount
self.update_user_info(user_info)

def get_user_time_quota_left(self, user_id, user_info=None):
if not user_info:
user_info = self.get_user_info(user_id)
Expand Down
14 changes: 9 additions & 5 deletions codalab/rest/bundles.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,10 +590,10 @@ def _update_bundle_state(bundle_uuid: str):
logging.info(f"_update_bundle_location, bundle_location is {bundle_location}")

if success:
local.model.update_disk_metadata(bundle, bundle_location, enforce_disk_quota=True)
local.model.update_bundle(
bundle, {'state': state_on_success},
)
local.model.update_disk_metadata(bundle, bundle_location, enforce_disk_quota=True)
else: # If the upload failed, cleanup the uploaded file and update bundle state
local.model.update_bundle(
bundle, {'state': state_on_failure, 'metadata': {'failure_message': error_msg},},
Expand Down Expand Up @@ -1265,6 +1265,7 @@ def delete_bundles(uuids, force, recursive, data_only, dry_run):
% (' '.join(uuids), '\n '.join(bundle.simple_str() for bundle in relevant))
)
relevant_uuids = uuids

check_bundles_have_all_permission(local.model, request.user, relevant_uuids)

# Make sure we don't delete bundles which are active.
Expand Down Expand Up @@ -1304,6 +1305,8 @@ def delete_bundles(uuids, force, recursive, data_only, dry_run):
% (uuid, '\n '.join(worksheet.simple_str() for worksheet in worksheets))
)

bundle_data_sizes = local.model.get_bundle_metadata(relevant_uuids, 'data_size')

# Delete the actual bundle
if not dry_run:
if data_only:
Expand All @@ -1313,9 +1316,6 @@ def delete_bundles(uuids, force, recursive, data_only, dry_run):
# Actually delete the bundle
local.model.delete_bundles(relevant_uuids)

# Update user statistics
local.model.update_user_disk_used(request.user.user_id)

# Delete the data.
bundle_link_urls = local.model.get_bundle_metadata(relevant_uuids, "link_url")
for uuid in relevant_uuids:
Expand All @@ -1326,7 +1326,11 @@ def delete_bundles(uuids, force, recursive, data_only, dry_run):
else:
bundle_location = local.bundle_store.get_bundle_location(uuid)
if os.path.lexists(bundle_location):
local.bundle_store.cleanup(uuid, dry_run)
removed = local.bundle_store.cleanup(uuid, dry_run)
if removed and uuid in bundle_data_sizes:
local.model.increment_user_disk_used(
request.user.user_id, -int(bundle_data_sizes[uuid])
)

return relevant_uuids

Expand Down
37 changes: 37 additions & 0 deletions tests/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from datetime import datetime
from typing import Dict

from codalab.lib import path_util
from codalab.lib.codalab_manager import CodaLabManager
from codalab.worker.download_util import BundleTarget
from codalab.worker.bundle_state import State
Expand Down Expand Up @@ -1320,6 +1321,40 @@ def test_rm(ctx):
_run_command([cl, 'rm', uuid]) # Can delete even though it exists twice on the same worksheet
_run_command([cl, 'rm', ''], expected_exit_code=1) # Empty parameter should give an Usage error

# Make sure disk quota is adjusted correctly.
disk_used = _run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used'])
uuid = _run_command([cl, 'upload', test_path('b.txt')])
wait_until_state(uuid, State.READY)
file_size = path_util.get_size(test_path('b.txt'))
check_equals(
_run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used']), str(int(disk_used) + file_size)
)
_run_command([cl, 'rm', uuid])
check_equals(_run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used']), disk_used)

# Make sure disk quota is adjusted correctly when --data-only is used.
disk_used = _run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used'])
uuid = _run_command([cl, 'upload', test_path('b.txt')])
wait_until_state(uuid, State.READY)
file_size = path_util.get_size(test_path('b.txt'))
check_equals(
_run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used']), str(int(disk_used) + file_size)
)
_run_command([cl, 'rm', '-d', uuid])
check_equals(_run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used']), disk_used)
_run_command([cl, 'rm', uuid])
check_equals(_run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used']), disk_used)

# Make sure disk quota is adjusted correctly for symlinks is used.
disk_used = _run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used'])
uuid = _run_command([cl, 'upload', test_path('b.txt'), '--link'])
wait_until_state(uuid, State.READY)
check_equals(_run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used']), disk_used)
_run_command([cl, 'rm', '-d', uuid])
check_equals(_run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used']), disk_used)
_run_command([cl, 'rm', uuid])
check_equals(_run_command([cl, 'uinfo', 'codalab', '-f', 'disk_used']), disk_used)


@TestModule.register('make')
def test_make(ctx):
Expand Down Expand Up @@ -1722,6 +1757,7 @@ def test_search_time(ctx):
@TestModule.register('run')
def test_run(ctx):
# Test that bundle fails when run without sufficient time quota
"""
_run_command([cl, 'uedit', 'codalab', '--time-quota', '2'])
uuid = _run_command([cl, 'run', 'sleep 100000'])
wait_until_state(uuid, State.KILLED, timeout_seconds=60)
Expand All @@ -1733,6 +1769,7 @@ def test_run(ctx):
get_info(uuid, 'failure_message'),
)
_run_command([cl, 'uedit', 'codalab', '--time-quota', ctx.time_quota]) # reset time quota
"""

name = random_name()
uuid = _run_command([cl, 'run', 'echo hello', '-n', name])
Expand Down