Skip to content

Commit

Permalink
poetry unlock (#430)
Browse files Browse the repository at this point in the history
* poetry unlock

* remove embed lock
  • Loading branch information
michaelfeil authored Oct 19, 2024
1 parent a41b1ec commit 0a80dff
Show file tree
Hide file tree
Showing 55 changed files with 1,271 additions and 1,409 deletions.
7 changes: 3 additions & 4 deletions libs/embed_package/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,12 @@ lint format: PYTHON_FILES=.
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/infinity_emb --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')

lint lint_diff:
poetry run ruff .
[ "$(PYTHON_FILES)" = "" ] || poetry run black $(PYTHON_FILES) --check
poetry run ruff check .

[ "$(PYTHON_FILES)" = "" ] || poetry run mypy $(PYTHON_FILES)

format format_diff:
[ "$(PYTHON_FILES)" = "" ] || poetry run black $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I --fix $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)

poetry_check:
poetry check
Expand Down
475 changes: 237 additions & 238 deletions libs/embed_package/poetry.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions libs/embed_package/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ python = ">=3.9,<4"
# infinity_emb = {path = "../infinity_emb", extras = ["optimum","vision","torch","audio"]}
infinity_emb = {version = "0.0.58", extras = ["optimum","vision","torch","audio"]}
[tool.poetry.group.test.dependencies]
pytest = "^7.0.0"
coverage = {extras = ["toml"], version = "^7.3.2"}
mypy = "^1.5.1"
pytest = "^8.0.0"
coverage = {extras = ["toml"], version = "^7.6.3"}
mypy = "^1.12.0"

[tool.poetry.group.codespell.dependencies]
codespell = "^2.2.0"
Expand Down
8 changes: 3 additions & 5 deletions libs/infinity_emb/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,15 @@ openapi:

# Define a variable for Python and notebook files.
PYTHON_FILES=.
lint format: PYTHON_FILES=.
lint: PYTHON_FILES=./infinity_emb
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/infinity_emb --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')

lint lint_diff:
poetry run ruff .
[ "$(PYTHON_FILES)" = "" ] || poetry run black $(PYTHON_FILES) --check
poetry run ruff check .
[ "$(PYTHON_FILES)" = "" ] || poetry run mypy $(PYTHON_FILES)

format format_diff:
[ "$(PYTHON_FILES)" = "" ] || poetry run black $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I --fix $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)

template_docker:
jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto
Expand Down
8 changes: 2 additions & 6 deletions libs/infinity_emb/infinity_emb/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,9 @@ def __post_init__(self):
if not isinstance(self.dtype, Dtype):
object.__setattr__(self, "dtype", Dtype[self.dtype])
if not isinstance(self.pooling_method, PoolingMethod):
object.__setattr__(
self, "pooling_method", PoolingMethod[self.pooling_method]
)
object.__setattr__(self, "pooling_method", PoolingMethod[self.pooling_method])
if not isinstance(self.embedding_dtype, EmbeddingDtype):
object.__setattr__(
self, "embedding_dtype", EmbeddingDtype[self.embedding_dtype]
)
object.__setattr__(self, "embedding_dtype", EmbeddingDtype[self.embedding_dtype])
if not self.served_model_name:
object.__setattr__(
self,
Expand Down
16 changes: 4 additions & 12 deletions libs/infinity_emb/infinity_emb/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ def __init__(

self.running = False
self._running_sepamore: Optional[Semaphore] = None
self._model, self._min_inference_t, self._max_inference_t = select_model(
self._engine_args
)
self._model, self._min_inference_t, self._max_inference_t = select_model(self._engine_args)

@classmethod
def from_args(
Expand Down Expand Up @@ -130,9 +128,7 @@ def capabilities(self) -> set[ModelCapabilites]:
def engine_args(self) -> EngineArgs:
return self._engine_args

async def embed(
self, sentences: list[str]
) -> tuple[list["EmbeddingReturnType"], int]:
async def embed(self, sentences: list[str]) -> tuple[list["EmbeddingReturnType"], int]:
"""embed multiple sentences
Kwargs:
Expand Down Expand Up @@ -279,9 +275,7 @@ def __init__(self, engines: Iterable["AsyncEmbeddingEngine"]):
set(engine.engine_args.served_model_name for engine in engines)
):
raise ValueError("Engines must have unique model names")
self.engines_dict = {
engine.engine_args.served_model_name: engine for engine in engines
}
self.engines_dict = {engine.engine_args.served_model_name: engine for engine in engines}

@classmethod
def from_args(cls, engine_args_array: Iterable[EngineArgs]) -> "AsyncEngineArray":
Expand Down Expand Up @@ -358,9 +352,7 @@ async def rerank(
list[float]: list of scores
int: token usage
"""
return await self[model].rerank(
query=query, docs=docs, raw_scores=raw_scores, top_n=top_n
)
return await self[model].rerank(query=query, docs=docs, raw_scores=raw_scores, top_n=top_n)

async def classify(
self, *, model: str, sentences: list[str], raw_scores: bool = False
Expand Down
24 changes: 6 additions & 18 deletions libs/infinity_emb/infinity_emb/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,7 @@ def _optional_infinity_var(self, name: str, default: str = ""):
self._debug(f"{name}=`{value}`")
return value

def _optional_infinity_var_multiple(
self, name: str, default: list[str]
) -> list[str]:
def _optional_infinity_var_multiple(self, name: str, default: list[str]) -> list[str]:
self.__pre_fetch_env_manager()
name = self.to_name(name)
value = os.getenv(name)
Expand Down Expand Up @@ -133,9 +131,7 @@ def vector_disk_cache(self):
@cached_property
def lengths_via_tokenize(self):
return self._to_bool_multiple(
self._optional_infinity_var_multiple(
"lengths_via_tokenize", default=["false"]
)
self._optional_infinity_var_multiple("lengths_via_tokenize", default=["false"])
)

@cached_property
Expand All @@ -152,9 +148,7 @@ def bettertransformer(self):

@cached_property
def preload_only(self):
return self._to_bool(
self._optional_infinity_var("preload_only", default="false")
)
return self._to_bool(self._optional_infinity_var("preload_only", default="false"))

@cached_property
def calibration_dataset_url(self):
Expand Down Expand Up @@ -198,9 +192,7 @@ def queue_size(self) -> int:

@cached_property
def permissive_cors(self):
return self._to_bool(
self._optional_infinity_var("permissive_cors", default="false")
)
return self._to_bool(self._optional_infinity_var("permissive_cors", default="false"))

@cached_property
def url_prefix(self):
Expand All @@ -223,19 +215,15 @@ def host(self):
@cached_property
def redirect_slash(self):
route = self._optional_infinity_var("redirect_slash", default="/docs")
assert not route or route.startswith(
"/"
), "INFINITY_REDIRECT_SLASH must start with /"
assert not route or route.startswith("/"), "INFINITY_REDIRECT_SLASH must start with /"
return route

@cached_property
def log_level(self):
return self._optional_infinity_var("log_level", default="info")

def _typed_multiple(self, name: str, cls: type["EnumTypeLike"]) -> list["str"]:
result = self._optional_infinity_var_multiple(
name, default=[cls.default_value()]
)
result = self._optional_infinity_var_multiple(name, default=[cls.default_value()])
assert all(cls(v) for v in result)
return result

Expand Down
8 changes: 2 additions & 6 deletions libs/infinity_emb/infinity_emb/fastapi_schemas/data_uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,7 @@ def __get_pydantic_core_schema__(cls, source_type: Any, handler: Any) -> Any:
from pydantic_core import core_schema

# return core_schema.no_info_after_validator_function(cls, handler(str))
return core_schema.no_info_after_validator_function(
cls.validate, core_schema.str_schema()
)
return core_schema.no_info_after_validator_function(cls.validate, core_schema.str_schema())

@classmethod
def validate(
Expand All @@ -214,9 +212,7 @@ def validate(
return m

@classmethod
def __get_pydantic_json_schema__(
cls, schema: MutableMapping[str, Any], handler: Any
) -> Any:
def __get_pydantic_json_schema__(cls, schema: MutableMapping[str, Any], handler: Any) -> Any:
json_schema = handler(schema)
json_schema.update(
pattern=DATA_URI_REGEX,
Expand Down
7 changes: 4 additions & 3 deletions libs/infinity_emb/infinity_emb/fastapi_schemas/pymodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,10 @@ def to_embeddings_response(
raise ValueError(
f"model {engine_args.served_model_name} does not support base64 encoding, as it uses uint8-bitpacking with {engine_args.embedding_dtype}"
)
embeddings = [base64.b64encode(np.frombuffer(emb.astype(np.float32), dtype=np.float32)) for emb in embeddings] # type: ignore
elif isinstance(embeddings, np.ndarray):
embeddings = embeddings.tolist()
embeddings = [
base64.b64encode(np.frombuffer(emb.astype(np.float32), dtype=np.float32)) # type: ignore
for emb in embeddings
] # type: ignore
else:
embeddings = [e.tolist() for e in embeddings]
return dict(
Expand Down
18 changes: 5 additions & 13 deletions libs/infinity_emb/infinity_emb/inference/batch_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,7 @@ def __init__(
" Consider increasing queue size"
)

async def embed(
self, sentences: list[str]
) -> tuple[list["EmbeddingReturnType"], int]:
async def embed(self, sentences: list[str]) -> tuple[list["EmbeddingReturnType"], int]:
"""Schedule a sentence to be embedded. Awaits until embedded.
Args:
Expand Down Expand Up @@ -286,9 +284,7 @@ async def audio_embed(
embeddings, usage = await self._schedule(items)
return embeddings, usage

async def _schedule(
self, list_queueitem: Sequence[AbstractSingle]
) -> tuple[list[Any], int]:
async def _schedule(self, list_queueitem: Sequence[AbstractSingle]) -> tuple[list[Any], int]:
"""adds list of items to the queue and awaits until these are completed."""
prios, usage = await self._get_prios_usage(list_queueitem)
new_prioqueue: list[PrioritizedQueueItem] = []
Expand Down Expand Up @@ -331,9 +327,7 @@ def overload_status(self) -> OverloadStatus:
results_absolute=len(self._result_store),
)

async def _get_prios_usage(
self, items: Sequence[AbstractSingle]
) -> tuple[list[int], int]:
async def _get_prios_usage(self, items: Sequence[AbstractSingle]) -> tuple[list[int], int]:
"""get priorities and usage
Args:
Expand Down Expand Up @@ -465,8 +459,7 @@ def _preprocess_batch(self):
# - or if many items are queued anyhow, so that a good batch
# may be popped already.
if not self._feature_queue.empty() and (
self._feature_queue.full()
or (len(self._queue_prio) < self._max_batch_size * 4)
self._feature_queue.full() or (len(self._queue_prio) < self._max_batch_size * 4)
):
# add some stochastic delay
time.sleep(self._batch_delay)
Expand Down Expand Up @@ -550,8 +543,7 @@ def _postprocess_batch(self):

if (
self._postprocess_queue.empty()
and self._last_inference
< time.perf_counter() + self._batch_delay * 2
and self._last_inference < time.perf_counter() + self._batch_delay * 2
):
# 5 ms, assuming this is below
# 3-50ms for inference on avg.
Expand Down
1 change: 1 addition & 0 deletions libs/infinity_emb/infinity_emb/inference/caching_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
This file contains the experimental code for retrieving and storing result of embeddings to diskcache
which may reduce latency.
"""

import asyncio
import queue
import threading
Expand Down
12 changes: 3 additions & 9 deletions libs/infinity_emb/infinity_emb/inference/select_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@

def get_engine_type_from_config(
engine_args: EngineArgs,
) -> Union[
EmbedderEngine, RerankEngine, PredictEngine, ImageEmbedEngine, AudioEmbedEngine
]:
) -> Union[EmbedderEngine, RerankEngine, PredictEngine, ImageEmbedEngine, AudioEmbedEngine]:
"""resolved the class of inference engine path from config.json of the repo."""
if engine_args.engine in [InferenceEngine.debugengine]:
return EmbedderEngine.from_inference_engine(engine_args.engine)
Expand All @@ -44,9 +42,7 @@ def get_engine_type_from_config(
with open(config_path, "r") as f:
config = json.load(f)

if any(
"SequenceClassification" in arch for arch in config.get("architectures", [])
):
if any("SequenceClassification" in arch for arch in config.get("architectures", [])):
id2label = config.get("id2label", {"0": "dummy"})
if len(id2label) < 2:
return RerankEngine.from_inference_engine(engine_args.engine)
Expand Down Expand Up @@ -81,9 +77,7 @@ def select_model(
# size one, warm up warm start timings.
loaded_engine.warmup(batch_size=engine_args.batch_size, n_tokens=1)
# size one token
min_inference_t = min(
loaded_engine.warmup(batch_size=1, n_tokens=1)[1] for _ in range(10)
)
min_inference_t = min(loaded_engine.warmup(batch_size=1, n_tokens=1)[1] for _ in range(10))
loaded_engine.warmup(batch_size=engine_args.batch_size, n_tokens=1)
emb_per_sec_short, max_inference_t, log_msg = loaded_engine.warmup(
batch_size=engine_args.batch_size, n_tokens=1
Expand Down
31 changes: 11 additions & 20 deletions libs/infinity_emb/infinity_emb/infinity_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,7 @@ async def kill_later(seconds: int):
await asyncio.sleep(seconds)
os.kill(os.getpid(), signal.SIGINT)

logger.info(
f"Preloaded configuration successfully. {engine_args_list} "
" -> exit ."
)
logger.info(f"Preloaded configuration successfully. {engine_args_list} " " -> exit .")
asyncio.create_task(kill_later(3))

yield
Expand Down Expand Up @@ -235,7 +232,7 @@ def _resolve_engine(model: str) -> "AsyncEmbeddingEngine":
return engine

def _resolve_mixed_input(
inputs: Union[DataURIorURL, list[DataURIorURL]]
inputs: Union[DataURIorURL, list[DataURIorURL]],
) -> list[Union[str, bytes]]:
if hasattr(inputs, "host"):
# if it is a single url
Expand All @@ -245,7 +242,8 @@ def _resolve_mixed_input(
else:
# is list, resolve to bytes or url
urls_or_bytes: list[Union[str, bytes]] = [ # type: ignore
str(d) if hasattr(d, "host") else d.data for d in inputs # type: ignore
str(d) if hasattr(d, "host") else d.data
for d in inputs # type: ignore
]
return urls_or_bytes

Expand Down Expand Up @@ -650,10 +648,7 @@ def typer_option_resolve(*args):
if hasattr(args[0], "default") and hasattr(args[0], "envvar")
else args[0]
)
return (
a.default if (hasattr(a, "default") and hasattr(a, "envvar")) else a
for a in args
)
return (a.default if (hasattr(a, "default") and hasattr(a, "envvar")) else a for a in args)


# CLI
Expand Down Expand Up @@ -694,9 +689,7 @@ def v1(
):
"""Infinity API ♾️ cli v1 - deprecated, consider use cli v2 via `infinity_emb v2`."""
if api_key:
raise ValueError(
"api_key is not supported in `v1`. Please migrate to `v2`."
)
raise ValueError("api_key is not supported in `v1`. Please migrate to `v2`.")
if not (
embedding_dtype == EmbeddingDtype.float32
or embedding_dtype == EmbeddingDtype.default_value()
Expand Down Expand Up @@ -820,12 +813,8 @@ def v2(
**_construct("preload_only"),
help="If true, only downloads models and verifies setup, then exit. Recommended for pre-caching the download in a Dockerfile.",
),
host: str = typer.Option(
**_construct("host"), help="host for the FastAPI uvicorn server"
),
port: int = typer.Option(
**_construct("port"), help="port for the FastAPI uvicorn server"
),
host: str = typer.Option(**_construct("host"), help="host for the FastAPI uvicorn server"),
port: int = typer.Option(**_construct("port"), help="port for the FastAPI uvicorn server"),
url_prefix: str = typer.Option(
**_construct("url_prefix"),
callback=validate_url,
Expand All @@ -834,7 +823,9 @@ def v2(
redirect_slash: str = typer.Option(
**_construct("redirect_slash"), help="where to redirect `/` requests to."
),
log_level: UVICORN_LOG_LEVELS = typer.Option(**_construct("log_level"), help="console log level."), # type: ignore
log_level: UVICORN_LOG_LEVELS = typer.Option(
**_construct("log_level"), help="console log level."
), # type: ignore
permissive_cors: bool = typer.Option(
**_construct("permissive_cors"), help="whether to allow permissive cors."
),
Expand Down
Loading

0 comments on commit 0a80dff

Please sign in to comment.