From 80088beb212c67badc0f94bc004d188a5a2d1e1a Mon Sep 17 00:00:00 2001 From: haixiw Date: Mon, 20 May 2024 16:57:24 +0000 Subject: [PATCH 1/5] Add tei cpu image --- src/sagemaker/huggingface/llm_utils.py | 7 +++ .../image_uri_config/huggingface-tei-cpu.json | 59 +++++++++++++++++++ src/sagemaker/image_uris.py | 6 +- .../image_uris/test_huggingface_llm.py | 7 ++- 4 files changed, 74 insertions(+), 5 deletions(-) create mode 100644 src/sagemaker/image_uri_config/huggingface-tei-cpu.json diff --git a/src/sagemaker/huggingface/llm_utils.py b/src/sagemaker/huggingface/llm_utils.py index 974cffcddf..8b790e5e88 100644 --- a/src/sagemaker/huggingface/llm_utils.py +++ b/src/sagemaker/huggingface/llm_utils.py @@ -72,6 +72,13 @@ def get_huggingface_llm_image_uri( version=version, image_scope="inference", ) + if backend == "huggingface-tei-cpu": + return image_uris.retrieve( + "huggingface-tei", + region=region, + version=version, + image_scope="inference", + ) if backend == "lmi": version = version or "0.24.0" return image_uris.retrieve(framework="djl-deepspeed", region=region, version=version) diff --git a/src/sagemaker/image_uri_config/huggingface-tei-cpu.json b/src/sagemaker/image_uri_config/huggingface-tei-cpu.json new file mode 100644 index 0000000000..c891328948 --- /dev/null +++ b/src/sagemaker/image_uri_config/huggingface-tei-cpu.json @@ -0,0 +1,59 @@ +{ + "inference": { + "processors": [ + "cpu" + ], + "version_aliases": { + "1.2": "1.2.3" + }, + "versions": { + "1.2.3": { + "py_versions": [ + "py310" + ], + "registries": { + "af-south-1": "510948584623", + "ap-east-1": "651117190479", + "ap-northeast-1": "354813040037", + "ap-northeast-2": "366743142698", + "ap-northeast-3": "867004704886", + "ap-south-1": "720646828776", + "ap-south-2": "628508329040", + "ap-southeast-1": "121021644041", + "ap-southeast-2": "783357654285", + "ap-southeast-3": "951798379941", + "ap-southeast-4": "106583098589", + "ca-central-1": "341280168497", + "ca-west-1": "190319476487", + "cn-north-1": "450853457545", + "cn-northwest-1": "451049120500", + "eu-central-1": "492215442770", + "eu-central-2": "680994064768", + "eu-north-1": "662702820516", + "eu-south-1": "978288397137", + "eu-south-2": "104374241257", + "eu-west-1": "141502667606", + "eu-west-2": "764974769150", + "eu-west-3": "659782779980", + "il-central-1": "898809789911", + "me-central-1": "272398656194", + "me-south-1": "801668240914", + "sa-east-1": "737474898029", + "us-east-1": "683313688378", + "us-east-2": "257758044811", + "us-gov-east-1": "237065988967", + "us-gov-west-1": "414596584902", + "us-iso-east-1": "833128469047", + "us-isob-east-1": "281123927165", + "us-west-1": "746614075791", + "us-west-2": "246618743249" + }, + "tag_prefix": "2.0.1-tei1.2.3", + "repository": "tei-cpu", + "container_version": { + "gpu": "ubuntu22.04" + } + } + } + } +} \ No newline at end of file diff --git a/src/sagemaker/image_uris.py b/src/sagemaker/image_uris.py index 1ca73d0af9..be5167dcc7 100644 --- a/src/sagemaker/image_uris.py +++ b/src/sagemaker/image_uris.py @@ -37,7 +37,8 @@ ECR_URI_TEMPLATE = "{registry}.dkr.{hostname}/{repository}" HUGGING_FACE_FRAMEWORK = "huggingface" HUGGING_FACE_LLM_FRAMEWORK = "huggingface-llm" -HUGGING_FACE_TEI_FRAMEWORK = "huggingface-tei" +HUGGING_FACE_TEI_GPU_FRAMEWORK = "huggingface-tei" +HUGGING_FACE_TEI_CPU_FRAMEWORK = "huggingface-tei-cpu" HUGGING_FACE_LLM_NEURONX_FRAMEWORK = "huggingface-llm-neuronx" XGBOOST_FRAMEWORK = "xgboost" SKLEARN_FRAMEWORK = "sklearn" @@ -478,7 +479,8 @@ def _validate_version_and_set_if_needed(version, config, framework): if version is None and framework in [ DATA_WRANGLER_FRAMEWORK, HUGGING_FACE_LLM_FRAMEWORK, - HUGGING_FACE_TEI_FRAMEWORK, + HUGGING_FACE_TEI_GPU_FRAMEWORK, + HUGGING_FACE_TEI_CPU_FRAMEWORK, HUGGING_FACE_LLM_NEURONX_FRAMEWORK, STABILITYAI_FRAMEWORK, ]: diff --git a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py index b1e8e8253e..43d2614568 100644 --- a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py +++ b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py @@ -73,17 +73,18 @@ def test_huggingface_uris(load_config): assert expected == uri -@pytest.mark.parametrize("load_config", ["huggingface-tei.json"], indirect=True) +@pytest.mark.parametrize("load_config", ["huggingface-tei.json", "huggingface-tei-cpu.json"], indirect=True) def test_huggingface_tei_uris(load_config): VERSIONS = load_config["inference"]["versions"] device = load_config["inference"]["processors"][0] - backend = "huggingface-tei" + backend = "huggingface-tei" if device == "gpu" else "huggingface-tei-cpu" + repo = "tei" if device == "gpu" else "tei-cpu" for version in VERSIONS: ACCOUNTS = load_config["inference"]["versions"][version]["registries"] for region in ACCOUNTS.keys(): uri = get_huggingface_llm_image_uri(backend, region=region, version=version) expected = expected_uris.huggingface_llm_framework_uri( - "tei", + repo, ACCOUNTS[region], version, TEI_VERSIONS_MAPPING[device][version], From e4090c9fbd240170ef2363c326db4b7ee711585a Mon Sep 17 00:00:00 2001 From: haixiw Date: Mon, 20 May 2024 17:21:50 +0000 Subject: [PATCH 2/5] fix format issue --- tests/unit/sagemaker/image_uris/test_huggingface_llm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py index 43d2614568..fc4b6e31dd 100644 --- a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py +++ b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py @@ -73,7 +73,9 @@ def test_huggingface_uris(load_config): assert expected == uri -@pytest.mark.parametrize("load_config", ["huggingface-tei.json", "huggingface-tei-cpu.json"], indirect=True) +@pytest.mark.parametrize( + "load_config", ["huggingface-tei.json", "huggingface-tei-cpu.json"], indirect=True +) def test_huggingface_tei_uris(load_config): VERSIONS = load_config["inference"]["versions"] device = load_config["inference"]["processors"][0] From 815ae086cdf7e9da63ad4993325c89eef515dffb Mon Sep 17 00:00:00 2001 From: haixiw Date: Mon, 20 May 2024 17:52:17 +0000 Subject: [PATCH 3/5] fix unit tests --- tests/unit/sagemaker/image_uris/test_huggingface_llm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py index fc4b6e31dd..fa10fd24fe 100644 --- a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py +++ b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py @@ -22,6 +22,9 @@ "gpu": { "1.2.3": "2.0.1-tei1.2.3-gpu-py310-cu122-ubuntu22.04", }, + "cpu": { + "1.2.3": "2.0.1-tei1.2.3-cpu-py310-ubuntu22.04", + }, } HF_VERSIONS_MAPPING = { "gpu": { From 92af5692bd63c4e6a4facbdec87f240fb75b34d3 Mon Sep 17 00:00:00 2001 From: haixiw Date: Mon, 20 May 2024 19:14:05 +0000 Subject: [PATCH 4/5] fix typo --- src/sagemaker/huggingface/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/huggingface/llm_utils.py b/src/sagemaker/huggingface/llm_utils.py index 8b790e5e88..9927d1d293 100644 --- a/src/sagemaker/huggingface/llm_utils.py +++ b/src/sagemaker/huggingface/llm_utils.py @@ -74,7 +74,7 @@ def get_huggingface_llm_image_uri( ) if backend == "huggingface-tei-cpu": return image_uris.retrieve( - "huggingface-tei", + "huggingface-tei-cpu", region=region, version=version, image_scope="inference", From adbe929dcaefa7effc8d0f2d08e4a3bcf3cb37e1 Mon Sep 17 00:00:00 2001 From: haixiw Date: Mon, 20 May 2024 21:17:26 +0000 Subject: [PATCH 5/5] fix typo --- src/sagemaker/image_uri_config/huggingface-tei-cpu.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker/image_uri_config/huggingface-tei-cpu.json b/src/sagemaker/image_uri_config/huggingface-tei-cpu.json index c891328948..d68b0d6307 100644 --- a/src/sagemaker/image_uri_config/huggingface-tei-cpu.json +++ b/src/sagemaker/image_uri_config/huggingface-tei-cpu.json @@ -51,7 +51,7 @@ "tag_prefix": "2.0.1-tei1.2.3", "repository": "tei-cpu", "container_version": { - "gpu": "ubuntu22.04" + "cpu": "ubuntu22.04" } } }