Skip to content

Commit

Permalink
chore: Poetry + precommit (#27)
Browse files Browse the repository at this point in the history
* chore: Poetry + precommit

* Remove uneccesary dependencies

* Update github workflows

* Add poetry version to github workflow

* Update Makefile

* Small tweaks

---------

Co-authored-by: Ismail Pelaseyed <[email protected]>
  • Loading branch information
simjak and homanp committed Feb 10, 2024
1 parent 1fd9bb4 commit 3ded918
Show file tree
Hide file tree
Showing 8 changed files with 3,388 additions and 133 deletions.
12 changes: 9 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ on:
branches: [main]
pull_request:

env:
POETRY_VERSION: "1.4.2"

jobs:
build:
runs-on: ubuntu-latest
Expand All @@ -16,14 +19,17 @@ jobs:
- "3.11"
steps:
- uses: actions/checkout@v4
- name: Install poetry
run: |
pipx install poetry==$POETRY_VERSION
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: poetry
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
poetry install
- name: Analysing the code with our lint
run: |
make lint
make lint
77 changes: 77 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
default_language_version:
python: python3.9
repos:
- repo: meta
hooks:
- id: check-hooks-apply
- id: check-useless-excludes

- repo: https://github.com/psf/black
rev: 23.9.1
hooks:
- id: black

- repo: https://github.com/asottile/blacken-docs
rev: 1.16.0
hooks:
- id: blacken-docs
additional_dependencies: [black==22.10.0]

- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
rev: v9.11.0
hooks:
- id: commitlint
stages: [commit-msg]
additional_dependencies: ['@commitlint/config-conventional']

- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
hooks:
- id: codespell
name: Run codespell to check for common misspellings in files
language: python
types: [ text ]
args: [ "--write-changes", "--ignore-words-list", "asend" ]
exclude: "poetry.lock"

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-vcs-permalinks
- id: end-of-file-fixer
- id: trailing-whitespace
args: [ --markdown-linebreak-ext=md ]
- id: debug-statements
- id: no-commit-to-branch
- id: check-merge-conflict
- id: check-toml
- id: check-yaml
args: [ '--unsafe' ] # for mkdocs.yml
- id: detect-private-key

- repo: https://github.com/commitizen-tools/commitizen
rev: v3.13.0
hooks:
- id: commitizen
- id: commitizen-branch
stages:
- post-commit
- push

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.290
hooks:
- id: ruff
types_or: [python, pyi, jupyter]

# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.8.0
# hooks:
# - id: mypy
# args: [--ignore-missing-imports]

- repo: https://github.com/PyCQA/bandit
rev: 1.7.6
hooks:
- id: bandit
args: ['-lll']
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
format:
python -m black .
python -m ruff --select I --fix .
poetry run black .
poetry run ruff --select I --fix .
poetry run vulture . --exclude=venv

PYTHON_FILES=.
lint: PYTHON_FILES=.
lint_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d master | grep -E '\.py$$')

lint lint_diff:
python -m black $(PYTHON_FILES) --check
python -m ruff .
poetry run black $(PYTHON_FILES) --check
poetry run ruff .
poetry run vulture . --exclude=venv
3,231 changes: 3,231 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

58 changes: 58 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
[tool.poetry]
name = "super-rag"
version = "0.0.2"
description = ""
authors = ["Ismail Pelaseyed"]
readme = "README.md"
packages = [{include = "main.py"}]

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
fastapi = "^0.109.2"
uvicorn = "^0.27.1"
weaviate-client = "^3.26.0"
llama-index = "^0.9.46"
pinecone-client = "^3.0.2"
qdrant-client = "^1.7.3"
ruff = "^0.2.1"
black = "^23.12.1"
flake8 = "^7.0.0"
vulture = "^2.11"
python-decouple = "^3.8"
semantic-router = "^0.0.20"
astrapy = "^0.7.4"
openai = "^1.12.0"
tqdm = "^4.66.2"
cohere = "^4.46"
cmake = "^3.28.1"
fastembed = "^0.2.1"
pypdf = "^4.0.1"
docx2txt = "^0.8"

[tool.poetry.extras]
fastembed = ["fastembed"]

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"


[tool.vulture]
exclude = [
"*/test_*.py",
"*/.venv/*.py",
]
ignore_decorators = ["@app.route", "@require_*"]
ignore_names = ["visit_*", "do_*"]
make_whitelist = true
min_confidence = 100
paths = ["."]
sort_by_size = true
verbose = false

[tool.ruff]
exclude = [
"*/docs/*.py",
"*/test_*.py",
"*/.venv/*.py",
]
118 changes: 0 additions & 118 deletions requirements.txt

This file was deleted.

7 changes: 3 additions & 4 deletions service/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np
import requests
from fastembed.embedding import FlagEmbedding as Embedding
from fastembed import TextEmbedding
from llama_index import Document, SimpleDirectoryReader
from llama_index.node_parser import SimpleNodeParser
from tqdm import tqdm
Expand All @@ -27,7 +27,6 @@ def _get_datasource_suffix(self, type: str) -> str:
"PDF": ".pdf",
"MARKDOWN": ".md",
"DOCX": ".docx",
"PPTX": ".pptx",
}
try:
return suffixes[type]
Expand Down Expand Up @@ -63,8 +62,8 @@ async def generate_embeddings(

async def generate_embedding(node):
if node is not None:
embedding_model = Embedding(
model_name="sentence-transformers/all-MiniLM-L6-v2", max_length=512
embedding_model = TextEmbedding(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
embeddings: List[np.ndarray] = list(embedding_model.embed(node.text))
embedding = (
Expand Down
8 changes: 4 additions & 4 deletions service/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import weaviate
from astrapy.db import AstraDB
from decouple import config
from fastembed.embedding import FlagEmbedding as Embedding
from fastembed import TextEmbedding
from pinecone import Pinecone, ServerlessSpec
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest
Expand Down Expand Up @@ -36,9 +36,9 @@ async def convert_to_rerank_format():
async def delete(self, file_url: str):
pass

async def _generate_vectors(sefl, input: str):
embedding_model = Embedding(
model_name="sentence-transformers/all-MiniLM-L6-v2", max_length=512
async def _generate_vectors(self, input: str):
embedding_model = TextEmbedding(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
embeddings: List[np.ndarray] = list(embedding_model.embed(input))
return embeddings[0].tolist()
Expand Down

0 comments on commit 3ded918

Please sign in to comment.