From b032a729e5e9fec52e32bb2a22cf53354ac1c602 Mon Sep 17 00:00:00 2001 From: Joshua Date: Wed, 5 Jul 2023 12:43:44 -0500 Subject: [PATCH 1/7] Support PEP 658, without hashes --- manywheel/upload.sh | 9 +++++++-- s3_management/manage.py | 24 +++++++++++++++++------- wheel/upload.sh | 8 ++++++-- windows/upload/upload.sh | 7 +++++-- 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/manywheel/upload.sh b/manywheel/upload.sh index a0c7b5b85..e43d0030b 100755 --- a/manywheel/upload.sh +++ b/manywheel/upload.sh @@ -33,12 +33,17 @@ for cuda_ver in "${CUDA_VERSIONS[@]}"; do # Upload the wheels to s3 if [[ -d "$wheel_dir" ]]; then + pushd "$wheel_dir" + find . -type f -exec sh -c 'unzip -j {} -d . "*.dist-info/METADATA" && mv METADATA {}.metadata' \; echo "Uploading all of: $(ls $wheel_dir) to $s3_wheel_dir" - ls "$wheel_dir" | xargs -I {} aws s3 cp "$wheel_dir"/{} "$s3_wheel_dir" --acl public-read + ls . | xargs -I {} aws s3 cp {} "$s3_wheel_dir" --acl public-read + popd fi if [[ -d "$libtorch_dir" ]]; then + pushd "$libtorch_dir" echo "Uploading all of: $(ls $libtorch_dir) to $s3_libtorch_dir" - ls "$libtorch_dir" | xargs -I {} aws s3 cp "$libtorch_dir"/{} "$s3_libtorch_dir" --acl public-read + ls . | xargs -I {} aws s3 cp {} "$s3_libtorch_dir" --acl public-read + popd fi done diff --git a/s3_management/manage.py b/s3_management/manage.py index 655f7de40..897a59409 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -14,7 +14,6 @@ S3 = boto3.resource('s3') -CLIENT = boto3.client('s3') BUCKET = S3.Bucket('pytorch') ACCEPTED_FILE_EXTENSIONS = ("whl", "zip", "tar.gz") @@ -121,8 +120,9 @@ def between_bad_dates(package_build_time: datetime): class S3Index: - def __init__(self: S3IndexType, objects: List[str], prefix: str) -> None: + def __init__(self: S3IndexType, objects: List[str], whls_with_metadata: Set[str], prefix: str) -> None: self.objects = objects + self.whls_with_metadata = set(whls_with_metadata) self.prefix = prefix.rstrip("/") self.html_name = PREFIXES_WITH_HTML[self.prefix] # should dynamically grab subdirectories like whl/test/cu101 @@ -255,7 +255,13 @@ def to_simple_package_html( out.append(' ') out.append('

Links for {}

'.format(package_name.lower().replace("_","-"))) for obj in sorted(self.gen_file_list(subdir, package_name)): - out.append(f' {path.basename(obj).replace("%2B","+")}
') + attributes = [] + if obj in self.whls_with_metadata: + # Serve the PEP 658 metadata attributes. + # For extra juiciness, we should expose the sha256, instead of "true". + attributes += 'data-dist-info-metadata="true"' + attributes = " ".join(attributes) + out.append(f' {path.basename(obj).replace("%2B","+")} {attributes}
') # Adding html footer out.append(' ') out.append('') @@ -338,6 +344,7 @@ def save_pep503_htmls(self) -> None: @classmethod def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType: objects = [] + whls_with_metadata = {} prefix = prefix.rstrip("/") for obj in BUCKET.objects.filter(Prefix=prefix): is_acceptable = any([path.dirname(obj.key) == prefix] + [ @@ -346,11 +353,14 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType: path.dirname(obj.key) ) for pattern in ACCEPTED_SUBDIR_PATTERNS - ]) and obj.key.endswith(ACCEPTED_FILE_EXTENSIONS) - if is_acceptable: - sanitized_key = obj.key.replace("+", "%2B") + ]) + sanitized_key = obj.key.replace("+", "%2B") + if obj.key.endswith(ACCEPTED_FILE_EXTENSIONS) and is_acceptable: objects.append(sanitized_key) - return cls(objects, prefix) + if obj.key.endswith(".whl.metadata"): + whls_with_metadata.append(sanitized_key[:-9]) + + return cls(objects, whls_with_metadata, prefix) def create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser("Manage S3 HTML indices for PyTorch") diff --git a/wheel/upload.sh b/wheel/upload.sh index 21e8ba120..1c986f11b 100755 --- a/wheel/upload.sh +++ b/wheel/upload.sh @@ -18,14 +18,18 @@ fi # Upload wheels to s3 if [[ -d "$MAC_WHEEL_FINAL_FOLDER" ]]; then + pushd "$MAC_WHEEL_FINAL_FOLDER" s3_dir="s3://pytorch/whl/${PIP_UPLOAD_FOLDER}cpu/" + find . -type f -exec sh -c 'unzip -j {} -d . "*.dist-info/METADATA" && mv METADATA {}.metadata' \; echo "Uploading all of: $(ls $MAC_WHEEL_FINAL_FOLDER) to $s3_dir" - ls "$MAC_WHEEL_FINAL_FOLDER" | xargs -I {} aws s3 cp "$MAC_WHEEL_FINAL_FOLDER"/{} "$s3_dir" --acl public-read + ls . | xargs -I {} aws s3 cp {} "$s3_dir" --acl public-read fi # Upload libtorch packages to s3 if [[ -d "$MAC_LIBTORCH_FINAL_FOLDER" ]]; then + pushd "$MAC_LIBTORCH_FINAL_FOLDER" s3_dir="s3://pytorch/libtorch/${PIP_UPLOAD_FOLDER}cpu/" echo "Uploading all of: $(ls $MAC_LIBTORCH_FINAL_FOLDER) to $s3_dir" - ls "$MAC_LIBTORCH_FINAL_FOLDER" | xargs -I {} aws s3 cp "$MAC_LIBTORCH_FINAL_FOLDER"/{} "$s3_dir" --acl public-read + ls . | xargs -I {} aws s3 cp {} "$s3_dir" --acl public-read + popd fi diff --git a/windows/upload/upload.sh b/windows/upload/upload.sh index dfc71cd4c..77f71f153 100755 --- a/windows/upload/upload.sh +++ b/windows/upload/upload.sh @@ -24,16 +24,19 @@ popd pushd winwheels/whl if [[ "$package_name" == pytorch ]]; then - find . -name "*torch-*.whl" | cut -f 2- -d'/' | xargs -I {} aws s3 cp {} s3://pytorch/whl/{} --acl public-read + whl_name="*torch-*.whl*" elif [[ "$package_name" == torchvision ]]; then - find . -name "*torchvision*.whl" | cut -f 2- -d'/' | xargs -I {} aws s3 cp {} s3://pytorch/whl/{} --acl public-read + whl_name="*torchvision*.whl*" fi +find . -type f -name "$whl_name" -exec sh -c 'unzip -j {} -d . "*.dist-info/METADATA" && mv METADATA {}.metadata' \; +find . -name "$whl_name" | cut -f 2- -d'/' | xargs -I {} aws s3 cp {} s3://pytorch/whl/{} --acl public-read popd if [[ "$package_name" == pytorch ]]; then pushd winwheels/libtorch find . -name "*.zip" | cut -f 2- -d'/' | xargs -I {} aws s3 cp {} s3://pytorch/libtorch/{} --acl public-read + popd fi # then run From 4f622d42f759df142d9c4005b50743f4fbf3bedb Mon Sep 17 00:00:00 2001 From: Josh Cannon Date: Wed, 12 Jul 2023 11:56:15 -0500 Subject: [PATCH 2/7] Update s3_management/manage.py --- s3_management/manage.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 897a59409..c0b8fa0e1 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -257,9 +257,9 @@ def to_simple_package_html( for obj in sorted(self.gen_file_list(subdir, package_name)): attributes = [] if obj in self.whls_with_metadata: - # Serve the PEP 658 metadata attributes. - # For extra juiciness, we should expose the sha256, instead of "true". - attributes += 'data-dist-info-metadata="true"' + # Serve the PEP 658 and PEP 714 metadata attributes + attributes += 'data-dist-info-metadata=true' + attributes += 'data-core-metadata=true' attributes = " ".join(attributes) out.append(f' {path.basename(obj).replace("%2B","+")} {attributes}
') # Adding html footer From 5759ca8da7c88e42c07dd8ed21376cc18089f1e6 Mon Sep 17 00:00:00 2001 From: Joshua Date: Fri, 27 Oct 2023 09:15:40 -0500 Subject: [PATCH 3/7] .key --- s3_management/manage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index a01f0131d..a1dde89a0 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -281,13 +281,13 @@ def to_simple_package_html( out.append('

Links for {}

'.format(package_name.lower().replace("_","-"))) for obj in sorted(self.gen_file_list(subdir, package_name)): attributes = [] - if obj in self.whls_with_metadata: + if obj.key in self.whls_with_metadata: # Serve the PEP 658 and PEP 714 metadata attributes attributes += 'data-dist-info-metadata=true' attributes += 'data-core-metadata=true' attributes = " ".join(attributes) maybe_fragment = f"#sha256={obj.checksum}" if obj.checksum else "" - out.append(f' {path.basename(obj).replace("%2B","+")} {attributes}
') + out.append(f' {path.basename(obj.key).replace("%2B","+")} {attributes}
') # Adding html footer out.append(' ') out.append('') From bd690c480be5d6382e0177296faafcad2dfd2728 Mon Sep 17 00:00:00 2001 From: Joshua Date: Fri, 27 Oct 2023 09:16:27 -0500 Subject: [PATCH 4/7] its a set --- s3_management/manage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index a1dde89a0..91271771e 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -396,7 +396,7 @@ def grant_public_read(cls: Type[S3IndexType], key: str) -> None: @classmethod def fetch_object_names(cls: Type[S3IndexType], prefix: str) -> List[str]: obj_names = [] - whls_with_metadata = {} + whls_with_metadata = set() for obj in BUCKET.objects.filter(Prefix=prefix): is_acceptable = any([path.dirname(obj.key) == prefix] + [ match( @@ -411,7 +411,7 @@ def fetch_object_names(cls: Type[S3IndexType], prefix: str) -> List[str]: sanitized_key = obj.key.replace("+", "%2B") if obj.key.endswith(".whl.metadata"): - whls_with_metadata.append(sanitized_key[:-9]) + whls_with_metadata.add(sanitized_key[:-9]) return obj_names @classmethod From 9b542c130f81d216b7fbca322c37a592cbd5a9e8 Mon Sep 17 00:00:00 2001 From: Joshua Date: Fri, 27 Oct 2023 09:17:42 -0500 Subject: [PATCH 5/7] clearer --- s3_management/manage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 91271771e..14aa69cfe 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -411,7 +411,7 @@ def fetch_object_names(cls: Type[S3IndexType], prefix: str) -> List[str]: sanitized_key = obj.key.replace("+", "%2B") if obj.key.endswith(".whl.metadata"): - whls_with_metadata.add(sanitized_key[:-9]) + whls_with_metadata.add(sanitized_key[:-len(".metadata")]) return obj_names @classmethod From 142e60062523e27b469908636ec09ec598d07473 Mon Sep 17 00:00:00 2001 From: Joshua Date: Fri, 27 Oct 2023 09:41:16 -0500 Subject: [PATCH 6/7] ok get it working with hashes, and post-rebase --- s3_management/manage.py | 44 +++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 14aa69cfe..b56444859 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -21,7 +21,7 @@ CLIENT = boto3.client('s3') BUCKET = S3.Bucket('pytorch') -ACCEPTED_FILE_EXTENSIONS = ("whl", "zip", "tar.gz") +ACCEPTED_FILE_EXTENSIONS = ("whl", "zip", "tar.gz", ".whl.metadata") ACCEPTED_SUBDIR_PATTERNS = [ r"cu[0-9]+", # for cuda r"rocm[0-9]+\.[0-9]+", # for rocm @@ -148,9 +148,15 @@ def between_bad_dates(package_build_time: datetime): class S3Index: - def __init__(self: S3IndexType, objects: List[S3Object], whls_with_metadata: Set[str], prefix: str) -> None: + def __init__( + self: S3IndexType, + objects: List[S3Object], + # Maps an object's key to the sha256 of the relevant .metadata (if it exists) + object_metadatas: Dict[str, str], + prefix: str, + ) -> None: self.objects = objects - self.whls_with_metadata = set(whls_with_metadata) + self.object_metadatas = object_metadatas self.prefix = prefix.rstrip("/") self.html_name = PREFIXES_WITH_HTML[self.prefix] # should dynamically grab subdirectories like whl/test/cu101 @@ -281,10 +287,10 @@ def to_simple_package_html( out.append('

Links for {}

'.format(package_name.lower().replace("_","-"))) for obj in sorted(self.gen_file_list(subdir, package_name)): attributes = [] - if obj.key in self.whls_with_metadata: + if metadata_sha256 := self.object_metadatas.get(obj.key): # Serve the PEP 658 and PEP 714 metadata attributes - attributes += 'data-dist-info-metadata=true' - attributes += 'data-core-metadata=true' + attributes += f'data-dist-info-metadata={metadata_sha256}' + attributes += f'data-core-metadata={metadata_sha256}' attributes = " ".join(attributes) maybe_fragment = f"#sha256={obj.checksum}" if obj.checksum else "" out.append(f' {path.basename(obj.key).replace("%2B","+")} {attributes}
') @@ -396,7 +402,6 @@ def grant_public_read(cls: Type[S3IndexType], key: str) -> None: @classmethod def fetch_object_names(cls: Type[S3IndexType], prefix: str) -> List[str]: obj_names = [] - whls_with_metadata = set() for obj in BUCKET.objects.filter(Prefix=prefix): is_acceptable = any([path.dirname(obj.key) == prefix] + [ match( @@ -408,10 +413,6 @@ def fetch_object_names(cls: Type[S3IndexType], prefix: str) -> List[str]: if not is_acceptable: continue obj_names.append(obj.key) - - sanitized_key = obj.key.replace("+", "%2B") - if obj.key.endswith(".whl.metadata"): - whls_with_metadata.add(sanitized_key[:-len(".metadata")]) return obj_names @classmethod @@ -419,6 +420,8 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType: prefix = prefix.rstrip("/") obj_names = cls.fetch_object_names(prefix) objects = [] + object_metadatas = {} + def fetch_metadata(key: str) : return CLIENT.head_object(Bucket=BUCKET.name, Key=key, ChecksumMode="Enabled") @@ -432,14 +435,17 @@ def fetch_metadata(key: str) : sha256 = response.get("Metadata", {}).get("checksum-sha256") sanitized_key = obj_key.replace("+", "%2B") size = response.get("ContentLength") - s3_object = S3Object( - key=sanitized_key, - orig_key=obj_key, - checksum=sha256, - size=int(size) if size else size, - ) - objects.append(s3_object) - return cls(objects, prefix) + if sanitized_key.endswith(".metadata"): + object_metadatas[sanitized_key[:-len(".metadata")]] = sha256 + else: + s3_object = S3Object( + key=sanitized_key, + orig_key=obj_key, + checksum=sha256, + size=int(size) if size else size, + ) + objects.append(s3_object) + return cls(objects, object_metadatas, prefix) @classmethod def undelete_prefix(cls: Type[S3IndexType], prefix: str) -> None: From 9736eee34b248fc8c2462fe171656fb8ee0c1463 Mon Sep 17 00:00:00 2001 From: Joshua Date: Fri, 27 Oct 2023 09:43:14 -0500 Subject: [PATCH 7/7] quotes --- s3_management/manage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index b56444859..2740f5287 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -289,8 +289,8 @@ def to_simple_package_html( attributes = [] if metadata_sha256 := self.object_metadatas.get(obj.key): # Serve the PEP 658 and PEP 714 metadata attributes - attributes += f'data-dist-info-metadata={metadata_sha256}' - attributes += f'data-core-metadata={metadata_sha256}' + attributes += f'data-dist-info-metadata="{metadata_sha256}"' + attributes += f'data-core-metadata="{metadata_sha256}"' attributes = " ".join(attributes) maybe_fragment = f"#sha256={obj.checksum}" if obj.checksum else "" out.append(f' {path.basename(obj.key).replace("%2B","+")} {attributes}
')