-
Notifications
You must be signed in to change notification settings - Fork 504
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
3 changed files
with
109 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Stress test for testing memory utilization mounted storage | ||
# | ||
# Lists or reads the files in the mounted storage (depending on --list flag) | ||
# Manually watch memory util using top to benchmark | ||
# | ||
# Usage: | ||
# cd tests/stress/mountedstorage | ||
# sky launch -c stress mount_stress.yaml | ||
|
||
name: stress | ||
|
||
resources: | ||
cloud: aws | ||
|
||
workdir: . | ||
|
||
file_mounts: | ||
/covid: | ||
source: s3://fah-public-data-covid19-cryptic-pockets | ||
mode: MOUNT | ||
|
||
setup: | | ||
# Install jupyter for playing around with | ||
pip install --upgrade pip | ||
conda init bash | ||
conda activate jupyter | ||
conda create -n jupyter python=3.9 -y | ||
conda activate jupyter | ||
pip install jupyter | ||
run: | | ||
python -u read_parallel.py /covid/ --list | ||
# conda activate jupyter | ||
# jupyter notebook --port 8888 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# Read all files in a directory recursively in parallel | ||
|
||
import os | ||
from concurrent.futures import ThreadPoolExecutor | ||
from concurrent.futures import as_completed | ||
|
||
|
||
def parse_args(): | ||
import argparse | ||
parser = argparse.ArgumentParser(description='Recursively read in parallel') | ||
parser.add_argument('path', help='Path to directory to read files;') | ||
parser.add_argument('--list', | ||
action='store_true', | ||
help='List files before reading') | ||
args = parser.parse_args() | ||
return args | ||
|
||
|
||
def sizeof_fmt(num, suffix="B"): | ||
for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: | ||
if abs(num) < 1024.0: | ||
return f"{num:3.1f}{unit}{suffix}" | ||
num /= 1024.0 | ||
return f"{num:.1f}Yi{suffix}" | ||
|
||
|
||
def read_file(file): | ||
print(file) | ||
with open(file, 'rb') as f: | ||
x = f.read() | ||
# print("File: " + file) | ||
# print("Size of file is :", sizeof_fmt(int(f.tell()))) | ||
|
||
|
||
def list_files(path): | ||
# Returns the number of ls ops done and files ls'd | ||
ls_done = 0 | ||
files_lsed = 0 | ||
for root, dirs, files in os.walk(path): | ||
files = [os.path.join(root, file) for file in files] | ||
ls_done += 1 | ||
files_lsed += len(files) | ||
if ls_done % 1000 == 0: | ||
print("ls done: ", ls_done) | ||
print("files ls'd: ", files_lsed) | ||
print("Got files: ", len(files)) | ||
return ls_done, files_lsed | ||
|
||
|
||
def read_parallel(path): | ||
print(f"Reading files in parallel {path}") | ||
done = 0 | ||
for root, dirs, files in os.walk(path): # Uses a generator | ||
with ThreadPoolExecutor(max_workers=64) as executor: | ||
futures = [ | ||
executor.submit(read_file, os.path.join(root, file)) | ||
for file in files | ||
] | ||
for future in as_completed(futures): | ||
done += 1 | ||
if done % 10 == 0: | ||
print("Reads done: ", done) | ||
print(future.result()) | ||
|
||
|
||
if __name__ == '__main__': | ||
args = parse_args() | ||
# s3://fah-public-data-covid19-cryptic-pockets mounted at /covid | ||
if args.list: | ||
ls_done, files_lsed = list_files(args.path) | ||
print("ls done: ", ls_done) | ||
print("files ls'd: ", files_lsed) | ||
else: | ||
read_parallel(args.path) |