diff --git a/docs/assets/openapi.json b/docs/assets/openapi.json index 0ffa50e4..521de4cd 100644 --- a/docs/assets/openapi.json +++ b/docs/assets/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.62"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format= \"base64\",\n extra_body={\n \"modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format= \"base64\",\n extra_body={\n \"modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["audio"],"const":"audio","title":"Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["image"],"const":"image","title":"Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"},"top_n":{"anyOf":[{"type":"integer","exclusiveMinimum":0.0},{"type":"null"}],"title":"Top N"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"modality":{"type":"string","enum":["text"],"const":"text","title":"Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file +{"openapi":"3.1.0","info":{"title":"♾️ Infinity - Embedding Inference Server","summary":"Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. Infinity is developed under MIT License at https://github.com/michaelfeil/infinity.","contact":{"name":"Michael Feil"},"license":{"name":"MIT License","identifier":"MIT"},"version":"0.0.63"},"paths":{"/health":{"get":{"summary":" Health","description":"health check endpoint\n\nReturns:\n dict(unix=float): dict with unix time stamp","operationId":"health","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"type":"number"},"type":"object","title":"Response Health"}}}}}}},"/":{"get":{"summary":"Redirect","operationId":"redirect__get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/models":{"get":{"summary":" Models","description":"get models endpoint","operationId":"models","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIModelInfo"}}}}}}},"/embeddings":{"post":{"summary":" Embeddings","description":"Encode Embeddings. Supports with multimodal inputs. Aligned with OpenAI Embeddings API.\n\n## Running Text Embeddings\n```python\nimport requests, base64\nrequests.post(\"http://..:7997/embeddings\",\n json={\"model\":\"openai/clip-vit-base-patch32\",\"input\":[\"Two cute cats.\"]})\n```\n\n## Running Image Embeddings\n```python\nrequests.post(\"http://..:7997/embeddings\",\n json={\n \"model\": \"openai/clip-vit-base-patch32\",\n \"encoding_format\": \"base64\",\n \"input\": [\n http://images.cocodataset.org/val2017/000000039769.jpg\",\n # can also be base64 encoded\n ],\n # set extra modality to image to process as image\n \"modality\": \"image\"\n)\n```\n\n## Running Audio Embeddings\n```python\nimport requests, base64\nurl = \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\"\n\ndef url_to_base64(url, modality = \"image\"):\n '''small helper to convert url to base64 without server requiring access to the url'''\n response = requests.get(url)\n response.raise_for_status()\n base64_encoded = base64.b64encode(response.content).decode('utf-8')\n mimetype = f\"{modality}/{url.split('.')[-1]}\"\n return f\"data:{mimetype};base64,{base64_encoded}\"\n\nrequests.post(\"http://localhost:7997/embeddings\",\n json={\n \"model\": \"laion/larger_clap_general\",\n \"encoding_format\": \"float\",\n \"input\": [\n url, url_to_base64(url, \"audio\")\n ],\n # set extra modality to audio to process as audio\n \"modality\": \"audio\"\n }\n)\n```\n\n## Running via OpenAI Client\n```python\nfrom openai import OpenAI # pip install openai==1.51.0\nclient = OpenAI(base_url=\"http://localhost:7997/\")\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[url_to_base64(url, \"audio\")],\n encoding_format= \"base64\",\n extra_body={\n \"modality\": \"audio\"\n }\n)\n\nclient.embeddings.create(\n model=\"laion/larger_clap_general\",\n input=[\"the sound of a beep\", \"the sound of a cat\"],\n encoding_format= \"base64\",\n extra_body={\n \"modality\": \"text\"\n }\n)\n```\n\n### Hint: Run all the above models on one server:\n```bash\ninfinity_emb v2 --model-id BAAI/bge-small-en-v1.5 --model-id openai/clip-vit-base-patch32 --model-id laion/larger_clap_general\n```","operationId":"embeddings","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/MultiModalOpenAIEmbedding"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/rerank":{"post":{"summary":" Rerank","description":"Rerank documents. Aligned with Cohere API (https://docs.cohere.com/reference/rerank)\n\n```python\nimport requests\nrequests.post(\"http://..:7997/rerank\",\n json={\n \"model\":\"mixedbread-ai/mxbai-rerank-xsmall-v1\",\n \"query\":\"Where is Munich?\",\n \"documents\":[\"Munich is in Germany.\", \"The sky is blue.\"]\n })\n```","operationId":"rerank","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/RerankInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ReRankResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/classify":{"post":{"summary":" Classify","description":"Score or Classify Sentiments\n\n```python\nimport requests\nrequests.post(\"http://..:7997/classify\",\n json={\"model\":\"SamLowe/roberta-base-go_emotions\",\"input\":[\"I am not having a great day.\"]})\n```","operationId":"classify","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ClassifyResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/embeddings_image":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `image`","description":"Encode Embeddings from Image files\n\nSupports URLs of Images and Base64-encoded Images\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_image\",\n json={\n \"model\":\"openai/clip-vit-base-patch32\",\n \"input\": [\n \"http://images.cocodataset.org/val2017/000000039769.jpg\",\n \"data:image/png;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDIMAGE\"\n ]\n })\n```","operationId":"embeddings_image","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ImageEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/embeddings_audio":{"post":{"summary":"Deprecated: Use `embeddings` with `modality` set to `audio`","description":"Encode Embeddings from Audio files\n\nSupports URLs of Audios and Base64-encoded Audios\n\n```python\nimport requests\nrequests.post(\"http://..:7997/embeddings_audio\",\n json={\n \"model\":\"laion/larger_clap_general\",\n \"input\": [\n \"https://github.com/michaelfeil/infinity/raw/3b72eb7c14bae06e68ddd07c1f23fe0bf403f220/libs/infinity_emb/tests/data/audio/beep.wav\",\n \"data:audio/wav;base64,iVBORw0KGgoDEMOoSAMPLEoENCODEDAUDIO\"\n ]\n })\n```","operationId":"embeddings_audio","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/AudioEmbeddingInput"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/OpenAIEmbeddingResult"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"deprecated":true}},"/metrics":{"get":{"summary":"Metrics","description":"Endpoint that serves Prometheus metrics.","operationId":"metrics_metrics_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AudioEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"AudioEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ClassifyInput":{"properties":{"input":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false}},"type":"object","required":["input"],"title":"ClassifyInput"},"ClassifyResult":{"properties":{"object":{"type":"string","enum":["classify"],"const":"classify","title":"Object","default":"classify"},"data":{"items":{"items":{"$ref":"#/components/schemas/_ClassifyObject"},"type":"array"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"ClassifyResult","description":"Result of classification."},"EmbeddingEncodingFormat":{"type":"string","enum":["float","base64"],"title":"EmbeddingEncodingFormat"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"ImageEmbeddingInput":{"properties":{"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"}},"type":"object","required":["input"],"title":"ImageEmbeddingInput","description":"LEGACY, DO NO LONGER UPDATE"},"ModelInfo":{"properties":{"id":{"type":"string","title":"Id"},"stats":{"type":"object","title":"Stats"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"owned_by":{"type":"string","enum":["infinity"],"const":"infinity","title":"Owned By","default":"infinity"},"created":{"type":"integer","title":"Created"},"backend":{"type":"string","title":"Backend","default":""},"capabilities":{"items":{"type":"string"},"type":"array","uniqueItems":true,"title":"Capabilities","default":[]}},"type":"object","required":["id","stats"],"title":"ModelInfo"},"MultiModalOpenAIEmbedding":{"oneOf":[{"$ref":"#/components/schemas/_OpenAIEmbeddingInput_Text"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Audio"},{"$ref":"#/components/schemas/OpenAIEmbeddingInput_Image"}],"title":"MultiModalOpenAIEmbedding"},"OpenAIEmbeddingInput_Audio":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["audio"],"const":"audio","title":"Modality","default":"audio"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Audio"},"OpenAIEmbeddingInput_Image":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"anyOf":[{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}]},"type":"array","maxItems":32,"minItems":1},{"type":"string","pattern":"data:(?P[\\w]+\\/[\\w\\-\\+\\.]+)?(?:\\;name\\=(?P[\\w\\.\\-%!*'~\\(\\)]+))?(?:\\;charset\\=(?P[\\w\\-\\+\\.]+))?(?P\\;base64)?,(?P.*)","examples":["data:text/plain;charset=utf-8;base64,VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wZWQgb3ZlciB0aGUgbGF6eSBkb2cu"]},{"type":"string","maxLength":2083,"minLength":1,"format":"uri"}],"title":"Input"},"modality":{"type":"string","enum":["image"],"const":"image","title":"Modality","default":"image"}},"type":"object","required":["input"],"title":"OpenAIEmbeddingInput_Image"},"OpenAIEmbeddingResult":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"data":{"items":{"$ref":"#/components/schemas/_EmbeddingObject"},"type":"array","title":"Data"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["data","model","usage"],"title":"OpenAIEmbeddingResult"},"OpenAIModelInfo":{"properties":{"data":{"items":{"$ref":"#/components/schemas/ModelInfo"},"type":"array","title":"Data"},"object":{"type":"string","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"OpenAIModelInfo"},"ReRankResult":{"properties":{"object":{"type":"string","enum":["rerank"],"const":"rerank","title":"Object","default":"rerank"},"results":{"items":{"$ref":"#/components/schemas/_ReRankObject"},"type":"array","title":"Results"},"model":{"type":"string","title":"Model"},"usage":{"$ref":"#/components/schemas/_Usage"},"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"}},"type":"object","required":["results","model","usage"],"title":"ReRankResult","description":"Following the Cohere protocol for Rerankers."},"RerankInput":{"properties":{"query":{"type":"string","maxLength":122880,"title":"Query"},"documents":{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1,"title":"Documents"},"return_documents":{"type":"boolean","title":"Return Documents","default":false},"raw_scores":{"type":"boolean","title":"Raw Scores","default":false},"model":{"type":"string","title":"Model","default":"default/not-specified"},"top_n":{"anyOf":[{"type":"integer","exclusiveMinimum":0.0},{"type":"null"}],"title":"Top N"}},"type":"object","required":["query","documents"],"title":"RerankInput","description":"Input for reranking"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"_ClassifyObject":{"properties":{"score":{"type":"number","title":"Score"},"label":{"type":"string","title":"Label"}},"type":"object","required":["score","label"],"title":"_ClassifyObject"},"_EmbeddingObject":{"properties":{"object":{"type":"string","enum":["embedding"],"const":"embedding","title":"Object","default":"embedding"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string","format":"binary"}],"title":"Embedding"},"index":{"type":"integer","title":"Index"}},"type":"object","required":["embedding","index"],"title":"_EmbeddingObject"},"_OpenAIEmbeddingInput_Text":{"properties":{"model":{"type":"string","title":"Model","default":"default/not-specified"},"encoding_format":{"$ref":"#/components/schemas/EmbeddingEncodingFormat","default":"float"},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User"},"input":{"anyOf":[{"items":{"type":"string","maxLength":122880},"type":"array","maxItems":2048,"minItems":1},{"type":"string","maxLength":122880}],"title":"Input"},"modality":{"type":"string","enum":["text"],"const":"text","title":"Modality","default":"text"}},"type":"object","required":["input"],"title":"_OpenAIEmbeddingInput_Text","description":"helper"},"_ReRankObject":{"properties":{"relevance_score":{"type":"number","title":"Relevance Score"},"index":{"type":"integer","title":"Index"},"document":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document"}},"type":"object","required":["relevance_score","index"],"title":"_ReRankObject"},"_Usage":{"properties":{"prompt_tokens":{"type":"integer","title":"Prompt Tokens"},"total_tokens":{"type":"integer","title":"Total Tokens"}},"type":"object","required":["prompt_tokens","total_tokens"],"title":"_Usage"}}}} \ No newline at end of file diff --git a/docs/docs/cli_v2.md b/docs/docs/cli_v2.md index 1d78f73a..934c8791 100644 --- a/docs/docs/cli_v2.md +++ b/docs/docs/cli_v2.md @@ -75,8 +75,8 @@ $ infinity_emb v2 --help │ `INFINITY_LENGTHS_VIA_TOKENIZ… │ │ [default: │ │ lengths-via-tokenize] │ -│ --dtype [float32|float16|int8|fp8|aut dtype for the model weights. │ -│ o] [env var: `INFINITY_DTYPE`] │ +│ --dtype [float32|float16|bfloat16|int dtype for the model weights. │ +│ 8|fp8|auto] [env var: `INFINITY_DTYPE`] │ │ [default: auto] │ │ --embedding-dtype [float32|int8|uint8|binary|ub dtype post-forward pass. If != │ │ inary] `float32`, using Post-Forward │ diff --git a/docs/docs/telemetry.md b/docs/docs/telemetry.md new file mode 100644 index 00000000..c69c141b --- /dev/null +++ b/docs/docs/telemetry.md @@ -0,0 +1,16 @@ +# Telemetry + +All CLI arguments are currently logged, as well as system info such as os. +The feedback will be used to optimzed common models / OS support. + +## Disable Telemetry +You can disable tracking like the following: + +``` +# set +export DO_NOT_TRACK="1" +# infinity specific setting +export INFINITY_ANONYMOUS_USAGE_STATS="0" +``` + +or by CLI argument. diff --git a/libs/client_infinity/infinity_client/pyproject.toml b/libs/client_infinity/infinity_client/pyproject.toml index d327210c..1479b118 100644 --- a/libs/client_infinity/infinity_client/pyproject.toml +++ b/libs/client_infinity/infinity_client/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "infinity_client" -version = "0.0.62" +version = "0.0.63" description = "A client library for accessing ♾️ Infinity - Embedding Inference Server" authors = [] readme = "README.md" diff --git a/libs/infinity_emb/infinity_emb/__init__.py b/libs/infinity_emb/infinity_emb/__init__.py index d2a8f313..7a0b6d23 100644 --- a/libs/infinity_emb/infinity_emb/__init__.py +++ b/libs/infinity_emb/infinity_emb/__init__.py @@ -28,7 +28,7 @@ from infinity_emb.log_handler import logger # noqa: E402 from infinity_emb.sync_engine import SyncEngineArray # noqa: E402 -__version__ = importlib.metadata.version("infinity_emb") +__version__: str = importlib.metadata.version("infinity_emb") __all__ = [ "__version__", diff --git a/libs/infinity_emb/infinity_emb/_optional_imports.py b/libs/infinity_emb/infinity_emb/_optional_imports.py index 6ae4a392..a7fb17a2 100644 --- a/libs/infinity_emb/infinity_emb/_optional_imports.py +++ b/libs/infinity_emb/infinity_emb/_optional_imports.py @@ -67,6 +67,7 @@ def _raise_error(self) -> None: CHECK_TRANSFORMERS = OptionalImports("transformers", "torch") CHECK_TORCH = OptionalImports("torch.nn", "torch") # CHECK_REQUESTS = OptionalImports("requests", "server") +CHECK_POSTHOG = OptionalImports("posthog", "server") CHECK_AIOHTTP = OptionalImports("aiohttp", "server") CHECK_PIL = OptionalImports("PIL", "vision") CHECK_SOUNDFILE = OptionalImports("soundfile", "audio") diff --git a/libs/infinity_emb/infinity_emb/env.py b/libs/infinity_emb/infinity_emb/env.py index 57ba3925..42233524 100644 --- a/libs/infinity_emb/infinity_emb/env.py +++ b/libs/infinity_emb/infinity_emb/env.py @@ -68,7 +68,7 @@ def _optional_infinity_var_multiple( @staticmethod def _to_bool(value: str) -> bool: - return value.lower() in {"true", "1"} + return value.lower() in {"true", "1", "yes", "y"} @staticmethod def _to_bool_multiple(value: list[str]) -> list[bool]: @@ -153,6 +153,17 @@ def calibration_dataset_url(self): default="https://raw.githubusercontent.com/michaelfeil/infinity/2da1f32d610b8edbe4ce58d0c44fc27c963abca6/docs/assets/multilingual_calibration.utf8", ) + @cached_property + def anonymous_usage_stats(self): + tracking_allowed = self._to_bool( + self._optional_infinity_var( + "anonymous_usage_stats", + default="true", + ) + ) + tracking_allowed_2 = not self._to_bool(os.getenv("DO_NOT_TRACK", "0")) + return tracking_allowed and tracking_allowed_2 + @cached_property def cache_dir(self) -> Path: """gets the cache directory for infinity_emb.""" diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py index 8d44a9d2..2777f115 100644 --- a/libs/infinity_emb/infinity_emb/infinity_server.py +++ b/libs/infinity_emb/infinity_emb/infinity_server.py @@ -6,6 +6,7 @@ import signal import sys import time +import uuid from contextlib import asynccontextmanager from typing import Any, Optional, Union @@ -36,9 +37,11 @@ ImageCorruption, InferenceEngine, Modality, + ModelCapabilites, ModelNotDeployedError, PoolingMethod, ) +from infinity_emb.telemetry import PostHog, StartupTelemetry def create_server( @@ -61,10 +64,32 @@ def create_server( from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from prometheus_fastapi_instrumentator import Instrumentator + def send_telemetry_start( + engine_args_list: list[EngineArgs], + capabilities_list: list[set[ModelCapabilites]], + ): + session_id = uuid.uuid4().hex + for arg, capabilities in zip(engine_args_list, capabilities_list): + PostHog.capture( + StartupTelemetry( + engine_args=arg, + num_engines=len(engine_args_list), + capabilities=capabilities, + session_id=session_id, + ) + ) + @asynccontextmanager async def lifespan(app: FastAPI): instrumentator.expose(app) # type: ignore app.engine_array = AsyncEngineArray.from_args(engine_args_list) # type: ignore + asyncio.create_task( + asyncio.to_thread( + send_telemetry_start, + engine_args_list, + [e.capabilities for e in app.engine_array], # type: ignore + ) + ) # start in a threadpool await app.engine_array.astart() # type: ignore @@ -87,6 +112,7 @@ async def kill_later(seconds: int): " -> exit ." ) asyncio.create_task(kill_later(3)) + yield await app.engine_array.astop() # type: ignore # shutdown! diff --git a/libs/infinity_emb/infinity_emb/primitives.py b/libs/infinity_emb/infinity_emb/primitives.py index e2a74639..f19ab48c 100644 --- a/libs/infinity_emb/infinity_emb/primitives.py +++ b/libs/infinity_emb/infinity_emb/primitives.py @@ -67,7 +67,7 @@ class ClassifyReturnType(TypedDict): UnionReturnType = Union[EmbeddingReturnType, ReRankReturnType, ClassifyReturnType] -class EnumType(enum.Enum): +class EnumType(str, enum.Enum): @classmethod @lru_cache def names_enum(cls) -> enum.Enum: @@ -125,6 +125,7 @@ def resolve(self) -> Optional[str]: class Dtype(EnumType): float32: str = "float32" float16: str = "float16" + bfloat16: str = "bfloat16" int8: str = "int8" fp8: str = "fp8" auto: str = "auto" diff --git a/libs/infinity_emb/infinity_emb/telemetry.py b/libs/infinity_emb/infinity_emb/telemetry.py new file mode 100644 index 00000000..975ad605 --- /dev/null +++ b/libs/infinity_emb/infinity_emb/telemetry.py @@ -0,0 +1,258 @@ +import hashlib +import logging +import os +import platform +import sys +from abc import abstractmethod +from dataclasses import asdict, dataclass, field +from functools import cache +from pathlib import Path +from typing import Any + +from infinity_emb._optional_imports import CHECK_POSTHOG, CHECK_TORCH +from infinity_emb.args import EngineArgs +from infinity_emb.env import MANAGER +from infinity_emb.log_handler import logger +from infinity_emb.primitives import ModelCapabilites + +if CHECK_POSTHOG.is_available: + import posthog + from posthog import Posthog +if CHECK_TORCH.is_available: + import torch + + +@dataclass +class ProductTelemetryEvent: + @abstractmethod + def render(self) -> dict[str, Any]: + ... + + @abstractmethod + def name(self) -> str: + ... + + +@cache +def get_system_anonymous_name(): + attributes = [] + + # Kernel version + attributes.append(platform.uname().release) + + # OS information + attributes.append(platform.uname().version) + + # Machine hardware name + attributes.append(platform.uname().machine) + + # Combine attributes and hash them + fingerprint_str = "|".join(attributes) + fingerprint_hash = hashlib.sha256(fingerprint_str.encode()).hexdigest() + return fingerprint_hash + + +@cache +def infinity_version(): + from infinity_emb import __version__ + + return __version__ + + +@cache +def get_system_properties(): + gpu_count = 0 + gpu_type = "" + gpu_memory_per_device_mb = 0 + if CHECK_TORCH.is_available: + if torch.cuda.is_available(): + device_property = torch.cuda.get_device_properties(0) + gpu_count = torch.cuda.device_count() + gpu_type = str(device_property.name) + gpu_memory_per_device_mb = ( + int(device_property.total_memory) * 1000000 / 1024**2 + ) + + return { + "gpu_count": gpu_count, + "gpu_type": gpu_type, + "gpu_memory_per_device_mb": gpu_memory_per_device_mb, + } + + +@cache +def _detect_cloud_provider() -> str: + # Try detecting through environment variables + env_to_cloud_provider = { + "RUNPOD_DC_ID": "RUNPOD", + } + for env_var, provider in env_to_cloud_provider.items(): + if os.environ.get(env_var): + return provider + + # Try detecting through vendor file + vendor_files = [ + "/sys/class/dmi/id/product_version", + "/sys/class/dmi/id/bios_vendor", + "/sys/class/dmi/id/product_name", + "/sys/class/dmi/id/chassis_asset_tag", + "/sys/class/dmi/id/sys_vendor", + ] + # Mapping of identifiable strings to cloud providers + cloud_identifiers = { + "amazon": "AWS", + "microsoft corporation": "AZURE", + "google": "GCP", + "oraclecloud": "OCI", + } + + for vendor_file in vendor_files: + path = Path(vendor_file) + if path.is_file(): + file_content = path.read_text().lower() + for identifier, provider in cloud_identifiers.items(): + if identifier in file_content: + return provider + + return "UNKNOWN" + + +@cache +def _get_cpu_info(): + try: + import cpuinfo # type: ignore + + info = cpuinfo.get_cpu_info() + except Exception: + info = {} + return { + "count": info.get("count", -1), + "cpu_type": info.get("brand_raw", ""), + "cpu_family_model_stepping": ",".join( + [ + str(info.get("family", "")), + str(info.get("model", "")), + str(info.get("stepping", "")), + ] + ), + } + + +@cache +def _get_os_info(): + try: + import psutil # type: ignore + + memory = psutil.virtual_memory().total // (1024**2) + except Exception: + memory = -1 + + return { + "os": platform.system(), + "architecture": platform.machine(), + "machine": platform.processor(), + "total_memory": memory, + } + + +@dataclass +class StartupTelemetry(ProductTelemetryEvent): + engine_args: "EngineArgs" + num_engines: int + capabilities: set["ModelCapabilites"] + session_id: str + + # auto populated fields + cloud_provider: str = field(default_factory=_detect_cloud_provider) + os: str = field(default_factory=lambda: _get_os_info()["os"]) + architecture: str = field(default_factory=lambda: _get_os_info()["architecture"]) + machine: str = field(default_factory=lambda: _get_os_info()["machine"]) + cpu_count: int = field(default_factory=lambda: _get_cpu_info()["count"]) + cpu_type: str = field(default_factory=lambda: _get_cpu_info()["cpu_type"]) + cpu_family_model_stepping: str = field( + default_factory=lambda: _get_cpu_info()["cpu_family_model_stepping"] + ) + total_memory: int = field(default_factory=lambda: _get_os_info()["total_memory"]) + gpu_count: int = field(default_factory=lambda: get_system_properties()["gpu_count"]) + gpu_type: str = field(default_factory=lambda: get_system_properties()["gpu_type"]) + gpu_memory_per_device_mb: int = field( + default_factory=lambda: get_system_properties()["gpu_memory_per_device_mb"] + ) + version: str = field(default_factory=infinity_version) + + def render(self): + """defines the message to be sent to posthog""" + return { + **asdict(self.engine_args), + "session_id": self.session_id, + "num_engines": self.num_engines, + "capabilities": self.capabilities, + "cloud_provider": self.cloud_provider, + "architecture": self.architecture, + "os": self.os, + "machine": self.machine, + "cpu_count": self.cpu_count, + "cpu_type": self.cpu_type, + "cpu_family_model_stepping": self.cpu_family_model_stepping, + "total_memory": self.total_memory, + "gpu_count": self.gpu_count, + "gpu_type": self.gpu_type, + "gpu_memory_per_device": self.gpu_memory_per_device_mb, + "version": self.version, + } + + def name(self): + return "startup_v1" + + +class _PostHogCapture: + def __init__(self): + self._posthog = None + + if not CHECK_POSTHOG.is_available or (not MANAGER.anonymous_usage_stats): + return + if "pytest" in sys.modules: + # disable posthog + posthog.disabled = True + + try: + logger.debug( + "Anonymized telemetry enabled. See \ + https://michaelfeil.github.io/infinity for more information." + ) + k = ( + "ph" # split + "c_IOq" # to avoid spam on project + "2AjB200yaxV2qtYTmhAacFE4x42RKOW0K0G5v5uh" + ) + self._posthog = Posthog( + project_api_key=k, + host="https://eu.i.posthog.com", + ) + + posthog_logger = logging.getLogger("posthog") + # Silence posthog's logging + posthog_logger.disabled = True + + except Exception: + logger.debug("Failed to startup posthog") + + @property + @cache + def anonymous_user_id(self): + return get_system_anonymous_name() + + def capture(self, event: ProductTelemetryEvent) -> None: + if self._posthog is None: + return + try: + self._posthog.capture( + distinct_id=self.anonymous_user_id, + event=event.name(), + properties=event.render(), + ) + except Exception as e: + logger.debug(f"Failed to send telemetry event {event}: {e}") + + +PostHog = _PostHogCapture() diff --git a/libs/infinity_emb/infinity_emb/transformer/embedder/sentence_transformer.py b/libs/infinity_emb/infinity_emb/transformer/embedder/sentence_transformer.py index f166211b..b86ae5a1 100644 --- a/libs/infinity_emb/infinity_emb/transformer/embedder/sentence_transformer.py +++ b/libs/infinity_emb/infinity_emb/transformer/embedder/sentence_transformer.py @@ -83,6 +83,10 @@ def __init__(self, *, engine_args=EngineArgs): ]: logger.info("Switching to half() precision (cuda: fp16). ") self.half() + elif self.device.type == "cuda" and engine_args.dtype in [ + Dtype.bfloat16, + ]: + fm.auto_model.to(torch.bfloat16) if engine_args.dtype in (Dtype.int8, Dtype.fp8): fm.auto_model = quant_interface( diff --git a/libs/infinity_emb/poetry.lock b/libs/infinity_emb/poetry.lock index 331bfcba..69f67c16 100644 --- a/libs/infinity_emb/poetry.lock +++ b/libs/infinity_emb/poetry.lock @@ -228,6 +228,17 @@ files = [ [package.extras] dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] +[[package]] +name = "backoff" +version = "2.2.1" +description = "Function decoration for backoff and retry" +optional = true +python-versions = ">=3.7,<4.0" +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] + [[package]] name = "beautifulsoup4" version = "4.12.3" @@ -1603,6 +1614,17 @@ files = [ [package.dependencies] beautifulsoup4 = ">=4.11.1" +[[package]] +name = "monotonic" +version = "1.6" +description = "An implementation of time.monotonic() for Python 2 & < 3.3" +optional = true +python-versions = "*" +files = [ + {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, + {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -2538,6 +2560,29 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "posthog" +version = "3.7.0" +description = "Integrate PostHog into any python application." +optional = true +python-versions = "*" +files = [ + {file = "posthog-3.7.0-py2.py3-none-any.whl", hash = "sha256:3555161c3a9557b5666f96d8e1f17f410ea0f07db56e399e336a1656d4e5c722"}, + {file = "posthog-3.7.0.tar.gz", hash = "sha256:b095d4354ba23f8b346ab5daed8ecfc5108772f922006982dfe8b2d29ebc6e0e"}, +] + +[package.dependencies] +backoff = ">=1.10.0" +monotonic = ">=1.5" +python-dateutil = ">2.1" +requests = ">=2.7,<3.0" +six = ">=1.5" + +[package.extras] +dev = ["black", "flake8", "flake8-print", "isort", "pre-commit"] +sentry = ["django", "sentry-sdk"] +test = ["coverage", "django", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)", "pylint", "pytest", "pytest-timeout"] + [[package]] name = "prometheus-client" version = "0.20.0" @@ -4752,7 +4797,7 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] -all = ["ctranslate2", "diskcache", "einops", "fastapi", "optimum", "orjson", "pillow", "prometheus-fastapi-instrumentator", "pydantic", "rich", "sentence-transformers", "soundfile", "timm", "torch", "typer", "uvicorn"] +all = ["ctranslate2", "diskcache", "einops", "fastapi", "optimum", "orjson", "pillow", "posthog", "prometheus-fastapi-instrumentator", "pydantic", "rich", "sentence-transformers", "soundfile", "timm", "torch", "typer", "uvicorn"] audio = ["soundfile"] cache = ["diskcache"] ct2 = ["ctranslate2", "sentence-transformers", "torch", "transformers"] @@ -4760,7 +4805,7 @@ einops = ["einops"] logging = ["rich"] onnxruntime-gpu = ["onnxruntime-gpu"] optimum = ["optimum"] -server = ["fastapi", "orjson", "prometheus-fastapi-instrumentator", "pydantic", "rich", "typer", "uvicorn"] +server = ["fastapi", "orjson", "posthog", "prometheus-fastapi-instrumentator", "pydantic", "rich", "typer", "uvicorn"] tensorrt = ["tensorrt"] torch = ["sentence-transformers", "torch"] vision = ["pillow", "timm"] @@ -4768,4 +4813,4 @@ vision = ["pillow", "timm"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "806fe858fc679b8842cb847d213b8607a887cdfe58c28bab1def8bfde0fbda05" +content-hash = "0c763ca18d0acb2628b5f25516cf7702214d22425fc8c6761ff7054c6285e9dd" diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml index 527abb26..4e83ebca 100644 --- a/libs/infinity_emb/pyproject.toml +++ b/libs/infinity_emb/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "infinity_emb" -version = "0.0.62" +version = "0.0.63" description = "Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip." authors = ["michaelfeil "] license = "MIT" @@ -23,7 +23,8 @@ orjson = {version = ">=3.9.8,!=3.10.0", optional=true} prometheus-fastapi-instrumentator = {version = ">=6.1.0", optional=true} uvicorn = {version = "^0.23.2", optional=true, extras = ["standard"]} typer = {version = "^0.9.0", optional=true, extras = ["all"]} -pydantic = {version = ">=2.4.0,<3", optional=true} +pydantic = {version = ">=2.4.0,<3", optional=true} +posthog = {version = "*", optional=true} # backend torch = {version = ">=2.2.1", source = "pypi", optional=true} sentence-transformers = {version = "^3.0.1", optional=true} @@ -93,6 +94,7 @@ server=[ "fastapi", "orjson", "prometheus-fastapi-instrumentator", + "posthog", "pydantic", "rich", "typer", @@ -107,6 +109,7 @@ all=[ "orjson", "pillow", "prometheus-fastapi-instrumentator", + "posthog", "pydantic", "rich", "sentence-transformers",