Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add vector_search function for pipeline aggregation #30

Merged
merged 6 commits into from
Jun 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions API/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,29 @@ def find_one_and_delete(self, collection, query):

def update_one(self, collection, query, update):
return self.db[collection].update_one(query, update)

# add a function for pipeline aggregation vector search
def vector_search(self, collection, embedding):

result = self.db[collection].aggregate([
{
"$vectorSearch": {
"index": "vector_index",
"path": "face_embedding",
"queryVector": embedding,
"numCandidates": 20,
"limit": 20
}
}, {
'$project': {
'_id': 0,
'Name': 1,
'Image': 1,
'score': {
'$meta': 'vectorSearchScore'
}
}
}
])
result_arr = [i for i in result]
return result_arr
Comment on lines 22 to +50

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider extracting the query and projection into variables for better readability and maintainability. This practice enhances code clarity and simplifies future modifications.

def vector_search(self, collection, embedding):
    query = {
        "$vectorSearch": {
            "index": "vector_index",
            "path": "face_embedding",
            "queryVector": embedding,
            "numCandidates": 20,
            "limit": 20
        }
    }
    projection = {
        '$project': {
            '_id': 0, 
            'Name': 1,
            'Image': 1,
            'score': {'$meta': 'vectorSearchScore'}
        }
    }
    result = self.db[collection].aggregate([query, projection])
    return [i for i in result]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requested changes has been made 🎉

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requested changes has been made 🎉

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requested changes has been made 🎉

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requested changes has been made 🎉

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requested changes has been made 🎉

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requested changes has been made 🎉

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requested changes has been made 🎉

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requested changes has been made 🎉

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Requested changes has been made 🎉

49 changes: 45 additions & 4 deletions API/route.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,27 @@

from bson import ObjectId
from deepface import DeepFace
from fastapi import APIRouter, HTTPException, Response
from fastapi import APIRouter, HTTPException, Response, UploadFile, File
from matplotlib import pyplot as plt
from PIL import Image
from pydantic import BaseModel

from API.database import Database
from API.utils import init_logging_config
from dotenv import load_dotenv

load_dotenv()
init_logging_config()

MONGO_URI = os.getenv("MONGO_URL1")
router = APIRouter()


client = Database()
client2 = Database(MONGO_URI, "FaceRec")

collection = "faceEntries"
collection2 = "ImageDB"


# Models for the data to be sent and received by the server
Devasy23 marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -79,16 +84,16 @@ async def create_new_faceEntry(Employee: Employee):
plt.imsave(f"Images/Faces/{Name}.jpg", face_image_data[0]["face"])
logging.info(f"Face saved {Name}")
embedding = DeepFace.represent(
image_filename, model_name="Facenet", detector_backend="mtcnn"
image_filename, model_name="Facenet512", detector_backend="mtcnn"
)
embeddings.append(embedding)
logging.info(f"Embedding created Embeddings for {Name}")
os.remove(image_filename)

logging.debug(f"About to insert Embeddings: {embeddings}")
# Store the data in the database
client.insert_one(
collection,
client2.insert_one(
collection2,
{
"EmployeeCode": EmployeeCode,
"Name": Name,
Expand Down Expand Up @@ -267,3 +272,39 @@ async def delete_employees(EmployeeCode: int):
client.find_one_and_delete(collection, {"EmployeeCode": EmployeeCode})

return {"Message": "Successfully Deleted"}


@router.post("/recognize_face", response_class=Response)
async def recognize_face(Face: UploadFile = File(...)):
"""
Recognize a face from the provided image.

Args:
Face (UploadFile): The image file to be recognized.

Returns:
Response: A response object containing the recognized employee information in JSON format.

Raises:
HTTPException: If an internal server error occurs.
"""
logging.info("Recognizing Face")
try:
img_data = await Face.read()
with open("temp.png", "wb") as f:
f.write(img_data)

embedding = DeepFace.represent(img_path="temp.png", model_name="Facenet512", detector_backend="mtcnn")
result = client2.vector_search(collection2, embedding[0]['embedding'])
logging.info(f"Result: {result[0]['Name']}, {result[0]['score']}")
os.remove("temp.png")
if result[0]['score'] < 0.5:
return Response(status_code=404, content=json.dumps({"message": "No match found"}))
except Exception as e:
logging.error(f"Error: {e}")
os.remove("temp.png")
raise HTTPException(status_code=500, detail="Internal server error")
return Response(
content=bytes(json.dumps(result[0], default=str), "utf-8"),
media_type="application/json",
)
15 changes: 14 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,17 @@
- Resolved various bugs and issues identified during the testing process.

### Removed
- Removed deprecated code and unused dependencies from the project.
- Removed deprecated code and unused dependencies from the project.

## [0.1.4] - 2024-03-16 - 23:00

### Added
- Implemented a new `recognize_Face` endpoint in [`route.py`](API/route.py). This endpoint accepts a base64 string as input, converts it into embeddings, and performs a vector search query on the MongoDB Atlas database. Changes made by @Devasy23.
- Added a new `vector_search` function in [`database.py`](API/database.py). This function performs a vector similarity search on the MongoDB Atlas database using Euclidean distance as the similarity measure. Changes made by @Devasy23.
- Updated [`index.ipynb`](index.ipynb) to include examples and usage of the new `recognize_Face` endpoint and `vector_search` function. Changes made by @Devasy23.

### Changed
- Updated the `Database` class in [`database.py`](API/database.py) to include the new `vector_search` function. Changes made by @Devasy23.

### Fixed
- Resolved various bugs and issues identified during the implementation and testing of the new features. Fixes made by @Devasy23.
233 changes: 0 additions & 233 deletions Vector Search/index.ipynb

This file was deleted.

570 changes: 570 additions & 0 deletions index.ipynb

Large diffs are not rendered by default.

36 changes: 36 additions & 0 deletions testing/test_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import base64
import logging
from unittest.mock import MagicMock, patch


import pytest
from fastapi.testclient import TestClient

from API.database import Database
from API.route import router
from API.utils import init_logging_config

init_logging_config()

def test_vector_search():
mock_result = [
{
"Name": "Test1",
"Image": "encoded_string1",
"score": 0.8
},
{
"Name": "Test2",
"Image": "encoded_string2",
"score": 0.7
}
]

mock_vector_search = MagicMock(return_value=mock_result)

with patch("API.database.Database.vector_search", mock_vector_search):
embedding = [0.1, 0.2, 0.3]
result = Database.vector_search("collection_name", embedding)

assert result == mock_result
mock_vector_search.assert_called_once_with("collection_name", embedding)
34 changes: 34 additions & 0 deletions testing/test_face_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,37 @@ def test_delete_face():
response = client.delete("/delete/1")
assert response.status_code == 200
assert response.json() == {"Message": "Successfully Deleted"}

@pytest.mark.run(order=6)
def test_recognize_face_fail():
mock_doc = {
"Image": "encoded_string2",
"Name": "Test2",
"score": 0.0,
}
with patch("API.database.Database.vector_search", return_value=[mock_doc]):

with open("./test-faces/devansh.jpg", "rb") as image_file:
response = client.post(
"/recognize_face",
files={"Face":image_file},
)
assert response.status_code == 404
assert response.json() == {'message': 'No match found'}

@pytest.mark.run(order=7)
def test_recognize_face_success():
mock_doc = {
"Image": "encoded_string2",
"Name": "Test2",
"score": 1.0,
}
with patch("API.database.Database.vector_search", return_value=[mock_doc]):

with open("./test-faces/devansh.jpg", "rb") as image_file:
response = client.post(
"/recognize_face",
files={"Face":image_file},
)
assert response.status_code == 200
assert response.json() == {'Name': 'Test2', 'Image':'encoded_string2','score': 1.0}
Loading