Skip to content

Commit

Permalink
[ci] Add models in text embedding integration
Browse files Browse the repository at this point in the history
  • Loading branch information
xyang16 committed Aug 21, 2024
1 parent db7bf29 commit 78b5ce0
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 39 deletions.
79 changes: 45 additions & 34 deletions tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,15 +840,45 @@ def get_model_name():

text_embedding_model_spec = {
"bge-base": {
"max_memory_per_gpu": [2.0, 2.0, 2.0, 2.0],
"batch_size": [1, 2, 4, 8],
}
}

reranking_model_spec = {
"max_memory_per_gpu": [2.0, 2.0],
"batch_size": [1, 8],
},
"e5-base-v2": {
"max_memory_per_gpu": [2.0, 2.0],
"batch_size": [1, 8],
},
"sentence-camembert-large": {
"max_memory_per_gpu": [3.0, 3.0],
"batch_size": [1, 8],
},
"roberta-base": {
"max_memory_per_gpu": [2.0, 2.0],
"batch_size": [1, 8],
},
"msmarco-distilbert-base-v4": {
"max_memory_per_gpu": [2.0, 2.0],
"batch_size": [1, 8],
},
"bge-reranker": {
"max_memory_per_gpu": [5.0, 5.0, 5.0, 5.0],
"batch_size": [1, 2, 4, 8],
"max_memory_per_gpu": [3.0, 3.0],
"batch_size": [1, 8],
"reranking": True,
},
"e5-mistral-7b": {
"max_memory_per_gpu": [18.0, 18.0],
"batch_size": [1, 8],
},
"gte-qwen2-7b": {
"max_memory_per_gpu": [18.0, 18.0],
"batch_size": [1, 8],
},
"gte-large": {
"max_memory_per_gpu": [3.0, 3.0],
"batch_size": [1, 8],
},
"bge-multilingual-gemma2": {
"max_memory_per_gpu": [20.0, 20.0],
"batch_size": [1, 8],
}
}

Expand Down Expand Up @@ -952,7 +982,7 @@ def send_json(data):

if resp.status_code >= 300:
LOGGER.exception(f"HTTP error: {resp}")
raise ValueError("Failed to send reqeust to model server")
raise ValueError("Failed to send request to model server")
return resp


Expand Down Expand Up @@ -1025,7 +1055,7 @@ def send_image_json(img_url, data):

if resp.status_code >= 300:
LOGGER.exception(f"HTTP error: {resp}")
raise ValueError("Failed to send reqeust to model server")
raise ValueError("Failed to send request to model server")
return resp


Expand Down Expand Up @@ -1708,8 +1738,12 @@ def test_text_embedding_model(model, model_spec):
spec = model_spec[args.model]
if "worker" in spec:
check_worker_number(spec["worker"])
reranking = spec.get("reranking", False)
for i, batch_size in enumerate(spec["batch_size"]):
req = {"inputs": batch_generation(batch_size)}
if reranking:
req = batch_generation_pair(batch_size)
else:
req = {"inputs": batch_generation(batch_size)}
logging.info(f"req {req}")
res = send_json(req).json()
logging.info(f"res: {res}")
Expand All @@ -1722,27 +1756,6 @@ def test_text_embedding_model(model, model_spec):
awscurl_run(req, spec.get("tokenizer"), batch_size)


def test_reranking_model(model, model_spec):
if model not in model_spec:
raise ValueError(
f"{args.model} is not one of the supporting models {list(model_spec.keys())}"
)
spec = model_spec[args.model]
if "worker" in spec:
check_worker_number(spec["worker"])
for i, batch_size in enumerate(spec["batch_size"]):
req = batch_generation_pair(batch_size)
logging.info(f"req {req}")
res = send_json(req).json()
logging.info(f"res: {res}")
if "max_memory_per_gpu" in spec:
validate_memory_usage(spec["max_memory_per_gpu"][i])

# awscurl little benchmark phase
logging.info(f"Little benchmark: concurrency {batch_size}")
awscurl_run(req, spec.get("tokenizer"), batch_size)


def run(raw_args):
parser = argparse.ArgumentParser(description="Build the LLM configs")
parser.add_argument("handler", help="the handler used in the model")
Expand Down Expand Up @@ -1833,8 +1846,6 @@ def run(raw_args):
test_multimodal(args.model, multi_modal_spec)
elif args.handler == "text_embedding":
test_text_embedding_model(args.model, text_embedding_model_spec)
elif args.handler == "reranking":
test_reranking_model(args.model, reranking_model_spec)

else:
raise ValueError(
Expand Down
47 changes: 43 additions & 4 deletions tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,14 +1065,53 @@
text_embedding_model_list = {
"bge-base": {
"option.model_id": "BAAI/bge-base-en-v1.5",
"batch_size": 32,
"batch_size": 8,
},
"e5-base-v2": {
"option.model_id": "intfloat/e5-base-v2",
"pooling": "cls",
"batch_size": 8,
},
"sentence-camembert-large": {
"option.model_id": "dangvantuan/sentence-camembert-large",
"pooling": "cls",
"batch_size": 8,
},
"roberta-base": {
"option.model_id": "relbert/relbert-roberta-base-nce-conceptnet",
"pooling": "cls",
"batch_size": 8,
},
"msmarco-distilbert-base-v4": {
"option.model_id": "sentence-transformers/msmarco-distilbert-base-v4",
"pooling": "cls",
"batch_size": 8,
},
"bge-reranker": {
"option.model_id": "BAAI/bge-reranker-base",
"reranking": True,
"includeTokenTypes": True,
"sigmoid": False,
"batch_size": 32,
"batch_size": 8,
},
"e5-mistral-7b": {
"option.model_id": "intfloat/e5-mistral-7b-instruct",
"pooling": "cls",
"batch_size": 8,
},
"gte-qwen2-7b": {
"option.model_id": "Alibaba-NLP/gte-Qwen2-7B-instruct",
"pooling": "cls",
"batch_size": 8,
},
"gte-large": {
"option.model_id": "Alibaba-NLP/gte-large-en-v1.5",
"option.trust_remote_code": "true",
"pooling": "cls",
"batch_size": 8,
},
"bge-multilingual-gemma2": {
"option.model_id": "BAAI/bge-multilingual-gemma2",
"pooling": "cls",
"batch_size": 8,
}
}

Expand Down
50 changes: 49 additions & 1 deletion tests/integration/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -933,11 +933,59 @@ def test_bge_base(self):
r.launch()
client.run("text_embedding bge-base".split())

def test_e5_base_v2(self):
with Runner('lmi', 'e5-base-v2') as r:
prepare.build_text_embedding_model("e5-base-v2")
r.launch()
client.run("text_embedding e5-base-v2".split())

def test_sentence_camembert_large(self):
with Runner('lmi', 'sentence-camembert-large') as r:
prepare.build_text_embedding_model("sentence-camembert-large")
r.launch()
client.run("text_embedding sentence-camembert-large".split())

def test_roberta_base(self):
with Runner('lmi', 'roberta-base') as r:
prepare.build_text_embedding_model("roberta-base")
r.launch()
client.run("text_embedding roberta-base".split())

def test_msmarco_distilbert_base_v4(self):
with Runner('lmi', 'msmarco-distilbert-base-v4') as r:
prepare.build_text_embedding_model("msmarco-distilbert-base-v4")
r.launch()
client.run("text_embedding msmarco-distilbert-base-v4".split())

def test_bge_reranker(self):
with Runner('lmi', 'bge-reranker') as r:
prepare.build_text_embedding_model("bge-reranker")
r.launch()
client.run("reranking bge-reranker".split())
client.run("text_embedding bge-reranker".split())

def test_e5_mistral_7b(self):
with Runner('lmi', 'e5-mistral-7b') as r:
prepare.build_text_embedding_model("e5-mistral-7b")
r.launch()
client.run("text_embedding e5-mistral-7b".split())

def test_gte_qwen2_7b(self):
with Runner('lmi', 'gte-qwen2-7b') as r:
prepare.build_text_embedding_model("gte-qwen2-7b")
r.launch()
client.run("text_embedding gte-qwen2-7b".split())

def test_gte_large(self):
with Runner('lmi', 'gte-large') as r:
prepare.build_text_embedding_model("gte-large")
r.launch()
client.run("text_embedding gte-large".split())

def test_bge_multilingual_gemma2(self):
with Runner('lmi', 'bge-multilingual-gemma2') as r:
prepare.build_text_embedding_model("bge-multilingual-gemma2")
r.launch()
client.run("text_embedding bge-multilingual-gemma2".split())


@pytest.mark.gpu
Expand Down

0 comments on commit 78b5ce0

Please sign in to comment.