Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ci] Add models in text embedding integration #2320

Merged
merged 1 commit into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 45 additions & 34 deletions tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,15 +840,45 @@ def get_model_name():

text_embedding_model_spec = {
"bge-base": {
"max_memory_per_gpu": [2.0, 2.0, 2.0, 2.0],
"batch_size": [1, 2, 4, 8],
}
}

reranking_model_spec = {
"max_memory_per_gpu": [2.0, 2.0],
"batch_size": [1, 8],
},
"e5-base-v2": {
"max_memory_per_gpu": [2.0, 2.0],
"batch_size": [1, 8],
},
"sentence-camembert-large": {
"max_memory_per_gpu": [3.0, 3.0],
"batch_size": [1, 8],
},
"roberta-base": {
"max_memory_per_gpu": [2.0, 2.0],
"batch_size": [1, 8],
},
"msmarco-distilbert-base-v4": {
"max_memory_per_gpu": [2.0, 2.0],
"batch_size": [1, 8],
},
"bge-reranker": {
"max_memory_per_gpu": [5.0, 5.0, 5.0, 5.0],
"batch_size": [1, 2, 4, 8],
"max_memory_per_gpu": [3.0, 3.0],
"batch_size": [1, 8],
"reranking": True,
},
"e5-mistral-7b": {
"max_memory_per_gpu": [18.0, 18.0],
"batch_size": [1, 8],
},
"gte-qwen2-7b": {
"max_memory_per_gpu": [18.0, 18.0],
"batch_size": [1, 8],
},
"gte-large": {
"max_memory_per_gpu": [3.0, 3.0],
"batch_size": [1, 8],
},
"bge-multilingual-gemma2": {
"max_memory_per_gpu": [20.0, 20.0],
"batch_size": [1, 8],
}
}

Expand Down Expand Up @@ -952,7 +982,7 @@ def send_json(data):

if resp.status_code >= 300:
LOGGER.exception(f"HTTP error: {resp}")
raise ValueError("Failed to send reqeust to model server")
raise ValueError("Failed to send request to model server")
return resp


Expand Down Expand Up @@ -1025,7 +1055,7 @@ def send_image_json(img_url, data):

if resp.status_code >= 300:
LOGGER.exception(f"HTTP error: {resp}")
raise ValueError("Failed to send reqeust to model server")
raise ValueError("Failed to send request to model server")
return resp


Expand Down Expand Up @@ -1708,8 +1738,12 @@ def test_text_embedding_model(model, model_spec):
spec = model_spec[args.model]
if "worker" in spec:
check_worker_number(spec["worker"])
reranking = spec.get("reranking", False)
for i, batch_size in enumerate(spec["batch_size"]):
req = {"inputs": batch_generation(batch_size)}
if reranking:
req = batch_generation_pair(batch_size)
else:
req = {"inputs": batch_generation(batch_size)}
logging.info(f"req {req}")
res = send_json(req).json()
logging.info(f"res: {res}")
Expand All @@ -1722,27 +1756,6 @@ def test_text_embedding_model(model, model_spec):
awscurl_run(req, spec.get("tokenizer"), batch_size)


def test_reranking_model(model, model_spec):
if model not in model_spec:
raise ValueError(
f"{args.model} is not one of the supporting models {list(model_spec.keys())}"
)
spec = model_spec[args.model]
if "worker" in spec:
check_worker_number(spec["worker"])
for i, batch_size in enumerate(spec["batch_size"]):
req = batch_generation_pair(batch_size)
logging.info(f"req {req}")
res = send_json(req).json()
logging.info(f"res: {res}")
if "max_memory_per_gpu" in spec:
validate_memory_usage(spec["max_memory_per_gpu"][i])

# awscurl little benchmark phase
logging.info(f"Little benchmark: concurrency {batch_size}")
awscurl_run(req, spec.get("tokenizer"), batch_size)


def run(raw_args):
parser = argparse.ArgumentParser(description="Build the LLM configs")
parser.add_argument("handler", help="the handler used in the model")
Expand Down Expand Up @@ -1833,8 +1846,6 @@ def run(raw_args):
test_multimodal(args.model, multi_modal_spec)
elif args.handler == "text_embedding":
test_text_embedding_model(args.model, text_embedding_model_spec)
elif args.handler == "reranking":
test_reranking_model(args.model, reranking_model_spec)

else:
raise ValueError(
Expand Down
47 changes: 43 additions & 4 deletions tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,14 +1065,53 @@
text_embedding_model_list = {
"bge-base": {
"option.model_id": "BAAI/bge-base-en-v1.5",
"batch_size": 32,
"batch_size": 8,
},
"e5-base-v2": {
"option.model_id": "intfloat/e5-base-v2",
"pooling": "cls",
"batch_size": 8,
},
"sentence-camembert-large": {
"option.model_id": "dangvantuan/sentence-camembert-large",
"pooling": "cls",
"batch_size": 8,
},
"roberta-base": {
"option.model_id": "relbert/relbert-roberta-base-nce-conceptnet",
"pooling": "cls",
"batch_size": 8,
},
"msmarco-distilbert-base-v4": {
"option.model_id": "sentence-transformers/msmarco-distilbert-base-v4",
"pooling": "cls",
"batch_size": 8,
},
"bge-reranker": {
"option.model_id": "BAAI/bge-reranker-base",
"reranking": True,
"includeTokenTypes": True,
"sigmoid": False,
"batch_size": 32,
"batch_size": 8,
},
"e5-mistral-7b": {
"option.model_id": "intfloat/e5-mistral-7b-instruct",
"pooling": "cls",
"batch_size": 8,
},
"gte-qwen2-7b": {
"option.model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
"pooling": "cls",
"batch_size": 8,
},
"gte-large": {
"option.model_id": "Alibaba-NLP/gte-large-en-v1.5",
"option.trust_remote_code": "true",
"pooling": "cls",
"batch_size": 8,
},
"bge-multilingual-gemma2": {
"option.model_id": "BAAI/bge-multilingual-gemma2",
"pooling": "cls",
"batch_size": 8,
}
}

Expand Down
50 changes: 49 additions & 1 deletion tests/integration/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,11 +933,59 @@ def test_bge_base(self):
r.launch()
client.run("text_embedding bge-base".split())

def test_e5_base_v2(self):
with Runner('lmi', 'e5-base-v2') as r:
prepare.build_text_embedding_model("e5-base-v2")
r.launch()
client.run("text_embedding e5-base-v2".split())

def test_sentence_camembert_large(self):
with Runner('lmi', 'sentence-camembert-large') as r:
prepare.build_text_embedding_model("sentence-camembert-large")
r.launch()
client.run("text_embedding sentence-camembert-large".split())

def test_roberta_base(self):
with Runner('lmi', 'roberta-base') as r:
prepare.build_text_embedding_model("roberta-base")
r.launch()
client.run("text_embedding roberta-base".split())

def test_msmarco_distilbert_base_v4(self):
with Runner('lmi', 'msmarco-distilbert-base-v4') as r:
prepare.build_text_embedding_model("msmarco-distilbert-base-v4")
r.launch()
client.run("text_embedding msmarco-distilbert-base-v4".split())

def test_bge_reranker(self):
with Runner('lmi', 'bge-reranker') as r:
prepare.build_text_embedding_model("bge-reranker")
r.launch()
client.run("reranking bge-reranker".split())
client.run("text_embedding bge-reranker".split())

def test_e5_mistral_7b(self):
with Runner('lmi', 'e5-mistral-7b') as r:
prepare.build_text_embedding_model("e5-mistral-7b")
r.launch()
client.run("text_embedding e5-mistral-7b".split())

def test_gte_qwen2_7b(self):
with Runner('lmi', 'gte-qwen2-7b') as r:
prepare.build_text_embedding_model("gte-qwen2-7b")
r.launch()
client.run("text_embedding gte-qwen2-7b".split())

def test_gte_large(self):
with Runner('lmi', 'gte-large') as r:
prepare.build_text_embedding_model("gte-large")
r.launch()
client.run("text_embedding gte-large".split())

def test_bge_multilingual_gemma2(self):
with Runner('lmi', 'bge-multilingual-gemma2') as r:
prepare.build_text_embedding_model("bge-multilingual-gemma2")
r.launch()
client.run("text_embedding bge-multilingual-gemma2".split())


@pytest.mark.gpu
Expand Down
Loading