-
Notifications
You must be signed in to change notification settings - Fork 111
/
test_optimum_embedding.py
68 lines (54 loc) · 1.78 KB
/
test_optimum_embedding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pytest
from asgi_lifespan import LifespanManager
from httpx import AsyncClient
from sentence_transformers import SentenceTransformer # type: ignore
from infinity_emb import create_server
from infinity_emb.args import EngineArgs
from infinity_emb.primitives import Device, InferenceEngine
PREFIX = "/v1_optimum"
MODEL: str = pytest.DEFAULT_BERT_MODEL # type: ignore
batch_size = 8
app = create_server(
url_prefix=PREFIX,
engine_args_list=[
EngineArgs(
model_name_or_path=MODEL,
batch_size=batch_size,
engine=InferenceEngine.optimum,
device=Device.cpu,
)
],
)
@pytest.fixture
def model_base() -> SentenceTransformer:
return SentenceTransformer(MODEL)
@pytest.fixture()
async def client():
async with AsyncClient(app=app, base_url="http://test", timeout=20) as client, LifespanManager(
app
):
yield client
@pytest.mark.anyio
async def test_model_route(client):
response = await client.get(f"{PREFIX}/models")
assert response.status_code == 200
rdata = response.json()
assert "data" in rdata
assert rdata["data"][0].get("id", "") == MODEL
assert isinstance(rdata["data"][0].get("stats"), dict)
@pytest.mark.anyio
async def test_embedding(client, model_base, helpers):
await helpers.embedding_verify(client, model_base, prefix=PREFIX, model_name=MODEL, decimal=2)
@pytest.mark.performance
@pytest.mark.anyio
async def test_batch_embedding(client, get_sts_bechmark_dataset, model_base, helpers):
await helpers.util_batch_embedding(
client=client,
sts_bechmark_dataset=get_sts_bechmark_dataset,
model_base=model_base,
prefix=PREFIX,
model_name=MODEL,
batch_size=batch_size,
downsample=16,
decimal=1,
)