Skip to content

Commit

Permalink
Merge pull request #4489 from codalab/rc1.7.0
Browse files Browse the repository at this point in the history
Bump version to 1.7.0
  • Loading branch information
epicfaace authored Aug 28, 2023
2 parents 5be8cb3 + 23c2d3a commit 2b2a31c
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 25 deletions.
2 changes: 1 addition & 1 deletion codalab/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

# Increment this on master when ready to cut a release.
# http://semver.org/
CODALAB_VERSION = '1.6.3'
CODALAB_VERSION = '1.7.0'
BINARY_PLACEHOLDER = '<binary>'
URLOPEN_TIMEOUT_SECONDS = int(os.environ.get('CODALAB_URLOPEN_TIMEOUT_SECONDS', 5 * 60))

Expand Down
27 changes: 15 additions & 12 deletions codalab/migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def upload_to_azure_blob(self, bundle_uuid, bundle_location, is_dir=False):
file_name = "contents.tar.gz" if is_dir else "contents.gz"
target_location = f"{self.target_store_url}/{bundle_uuid}/{file_name}"

# TODO: This step might cause repeated upload. Can not check by checking size (Azure blob storage is zipped).
if FileSystems.exists(target_location):
path_util.remove(target_location)

Expand Down Expand Up @@ -202,33 +203,35 @@ def delete_original_bundle(self, bundle_uuid, bundle_location):
description='Manages your local CodaLab Worksheets service deployment'
)
parser.add_argument(
'-w', '--worksheet', type=str, help='The worksheet uuid that needs migration'
'-a', '--all', help='Run migration on all worksheets and all bundles', action='store_true',
)
parser.add_argument('--target_store_name', type=str, help='The destination bundle store name')
parser.add_argument(
'-d',
'--dry-run',
help='Only upload the bundle to Azure, does not modify database',
action='store_true',
'-w', '--worksheet', type=str, help='The worksheet uuid that needs migration'
)
parser.add_argument('--target_store_name', type=str, help='The destination bundle store name')
parser.add_argument(
'-k', '--keep', help='Keep bundle content in origin bundle store', action='store_true'
'-c', '--change_db', help='Change the bundle location in the database', action='store_true',
)
parser.add_argument('-d', '--delete', help='Delete the original database', action='store_true')

args = parser.parse_args()

worksheet_uuid = args.worksheet
target_store_name = (
"azure-store-default" if args.target_store_name is None else args.target_store_name
)
if worksheet_uuid is not None and not spec_util.UUID_REGEX.match(worksheet_uuid):
raise Exception("Input worksheet uuid has wrong format. ")

# TODO: write output to log / log files
migration = Migration(target_store_name)
migration.setUp()

bundle_uuids = migration.get_bundle_uuids(worksheet_uuid)
if args.all:
bundle_uuids = migration.get_bundle_uuids(worksheet_uuid=None)
else:
# Must specify worksheet uuid
if worksheet_uuid is not None and not spec_util.UUID_REGEX.match(worksheet_uuid):
raise Exception("Input worksheet uuid has wrong format. ")
bundle_uuids = migration.get_bundle_uuids(worksheet_uuid)

for bundle_uuid in bundle_uuids:
logging.info(bundle_uuid)
Expand Down Expand Up @@ -256,8 +259,8 @@ def delete_original_bundle(self, bundle_uuid, bundle_location):
is_dir = bundle_info['type'] == 'directory'
migration.upload_to_azure_blob(bundle_uuid, bundle_location, is_dir)

if not args.dry_run: # If dry_run, only upload to new bundle location
if args.change_db: # If need to change the database, continue to run
migration.modify_bundle_data(bundle, bundle_uuid, is_dir)
migration.sanity_check(bundle_uuid, bundle_location, bundle_info, is_dir)
if not args.keep:
if args.delete:
migration.delete_original_bundle(bundle_uuid, bundle_location)
21 changes: 14 additions & 7 deletions codalab/worker/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def get_nvidia_devices(self, use_docker=True):
docker.errors.ImageNotFound if the CUDA image cannot be pulled
docker.errors.APIError if another server error occurs
"""
cuda_image = 'nvidia/cuda:12.2.0-devel-ubuntu22.04'
cuda_image = 'sulfurheron/nvidia-cuda:9.0-cudnn7-devel-ubuntu16.04-2018-06-08'
nvidia_command = 'nvidia-smi --query-gpu=index,uuid --format=csv,noheader'
if use_docker:
self.client.images.pull(cuda_image)
Expand All @@ -164,20 +164,27 @@ def get_nvidia_devices(self, use_docker=True):
stdout=True,
remove=True,
)
gpus = output.decode()
gpu_info = output.decode()
GPU_REGEX = r"(\d+), ((?:GPU-)[a-fA-F0-9-]+)"
gpus = {}
for line in gpu_info.splitlines():
match = re.match(GPU_REGEX, line)
if match:
idx = match.group(1)
uuid = match.group(2)
gpus[idx] = uuid

else:
# use the singularity runtime to run nvidia-smi
# img = Client.pull('docker://' + cuda_image, pull_folder='/tmp')
# output = Client.execute(img, nvidia_command, options=['--nv'])
# if output['return_code'] != 0:
# raise SingularityError
# gpus = output['message']
gpus = ""
gpus = {}
# Get newline delimited gpu-index, gpu-uuid list
logger.info("GPUs: " + str(gpus.split('\n')[:-1]))
return {
gpu.split(',')[0].strip(): gpu.split(',')[1].strip() for gpu in gpus.split('\n')[:-1]
}
logger.info("GPUs: " + str(gpus))
return gpus

@wrap_exception('Unable to fetch Docker container ip')
def get_container_ip(self, network_name: str, container_id: str):
Expand Down
5 changes: 4 additions & 1 deletion codalab/worker/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,11 @@ def main():
# Create temp file to store kubernetes cert, as we need to pass in a file path.
# TODO: Delete the file afterwards (upon CodaLab service stop?)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
f.write(args.kubernetes_cert)
f.write(
args.kubernetes_cert.replace(r'\n', '\n')
) # Properly add newlines, which appear as "\n" if specified in the environment variable.
kubernetes_cert_path = f.name
logger.info('Temporarily writing kubernetes cert to: %s', kubernetes_cert_path)
else:
kubernetes_cert_path = args.kubernetes_cert_path
bundle_runtime_class = KubernetesRuntime(
Expand Down
14 changes: 13 additions & 1 deletion codalab/worker_manager/kubernetes_worker_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from argparse import ArgumentParser
from typing import Any, Dict, List, Optional
from codalab.common import BundleRuntime
import tempfile

from urllib3.exceptions import MaxRetryError, NewConnectionError # type: ignore

Expand Down Expand Up @@ -89,7 +90,18 @@ def __init__(self, args):
configuration.api_key_prefix['authorization'] = 'Bearer'
configuration.api_key['authorization'] = args.auth_token
configuration.host = args.cluster_host
configuration.ssl_ca_cert = args.cert_path
if args.cert_path == "/dev/null" and args.cert != "/dev/null":
# Create temp file to store kubernetes cert, as we need to pass in a file path.
# TODO: Delete the file afterwards (upon CodaLab service stop?)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
f.write(
args.cert.replace(r'\n', '\n')
) # Properly add newlines, which appear as "\n" if specified in the environment variable.
cert_path = f.name
logger.info('Temporarily writing kubernetes cert to: %s', cert_path)
else:
cert_path = args.cert_path
configuration.ssl_ca_cert = cert_path
if configuration.host == "https://codalab-control-plane:6443":
# Don't verify SSL if we are connecting to a local cluster for testing / development.
configuration.verify_ssl = False
Expand Down
2 changes: 1 addition & 1 deletion docs/REST-API-Reference.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# REST API Reference

_version 1.6.3_
_version 1.7.0_

This reference and the REST API itself is still under heavy development and is
subject to change at any time. Feedback through our GitHub issues is appreciated!
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/constants.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Should match codalab/common.py#CODALAB_VERSION
export const CODALAB_VERSION = '1.6.3';
export const CODALAB_VERSION = '1.7.0';

// Name Regex to match the backend in spec_utils.py
export const NAME_REGEX = /^[a-zA-Z_][a-zA-Z0-9_.-]*$/i;
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


# should match codalab/common.py#CODALAB_VERSION
CODALAB_VERSION = "1.6.3"
CODALAB_VERSION = "1.7.0"


class Install(install):
Expand Down

0 comments on commit 2b2a31c

Please sign in to comment.