Skip to content

Commit

Permalink
Merge pull request #23 from Siege-Software/19-support-typesense-synonyms
Browse files Browse the repository at this point in the history
add feature for typesense synonyms
  • Loading branch information
EricOuma authored Oct 12, 2023
2 parents 9c66f1e + b70a8c0 commit 4968cd7
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 11 deletions.
33 changes: 29 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ from django_typesense import fields
class SongCollection(TypesenseCollection):
# At least one of the indexed fields has to be provided as one of the `query_by_fields`. Must be a CharField
query_by_fields = 'title,artist_names'
query_by_fields = 'title,artist_names,genre_name'
title = fields.TypesenseCharField()
genre_name = fields.TypesenseCharField(value='genre.name')
Expand Down Expand Up @@ -124,9 +124,11 @@ in the django app where the model you are creating a collection for is.
> avoid triggering database queries that will negatively affect performance
> [Issue #16](https://github.com/Siege-Software/django-typesense/issues/16).
### Update Collection Schema [WIP]
### Update Collection Schema
To add or remove fields to a collection's schema in place, update your collection then run:
`SongCollection.update_typesense_collection()`
`SongCollection().update_typesense_collection()`

This also updates the [synonyms](#synonyms)

### Admin Integration
To make a model admin display and search from the model's Typesense collection, the admin class should
Expand Down Expand Up @@ -157,7 +159,7 @@ model_qs = Song.objects.all().order_by('id') # querysets should be ordered
bulk_update_typesense_records(model_qs, batch_size=1024)
```

# Custom Admin Filters
### Custom Admin Filters
To make use of custom admin filters, define a `filter_by` property in the filter definition.
Define boolean typesense field `has_views` that gets it's value from a model property. This is example is not necessarily practical but for demo purposes.

Expand Down Expand Up @@ -209,3 +211,26 @@ class HasViewsFilter(admin.SimpleListFilter):
return {}
```

### Synonyms
The [synonyms](https://typesense.org/docs/0.25.1/api/synonyms.html) feature allows you to define search terms that
should be considered equivalent. Synonyms should be defined with classes that inherit from `Synonym`

```
from django_typesense.collections import Synonym
# say you need users searching the genre hip-hop to get results if they use the search term rap
class HipHopSynonym(Synonym):
name = 'hip-hop-synonyms'
synonyms = ['hip-hop', 'rap']
# Update the collection to include the synonym
class SongCollection(TypesenseCollection):
...
synonyms = [HipHopSynonym]
...
```
To update the collection with any changes made to synonyms run `SongCollection().update_typesense_collection()`


75 changes: 68 additions & 7 deletions django_typesense/collections.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from collections import defaultdict
import pdb

import django

from typing import Optional, Iterable, Union, Dict
from typing import Iterable, Union, Dict, List

from django.db.models import QuerySet
from django.utils.functional import cached_property
Expand All @@ -19,6 +19,30 @@
_COLLECTION_META_OPTIONS = {
'schema_name', 'default_sorting_field', 'token_separators', 'symbols_to_index', 'query_by_fields'
}
_SYNONYM_PARAMETERS = {'synonyms', 'root', 'locale', 'symbols_to_index'}


class Synonym:
name: str = ''
synonyms: list[str] = None
root: str = ''
locale: str = ''
symbols_to_index: list[str] = None

@classproperty
def data(cls):
if not cls.name:
raise ValueError('the name attribute must be set')

if not cls.synonyms:
raise ValueError('the synonyms attribute must be set')

if cls.symbols_to_index is None:
cls.symbols_to_index = []

return {
cls.name: {param: getattr(cls, param) for param in _SYNONYM_PARAMETERS if getattr(cls, param)}
}


class TypesenseCollectionMeta(type):
Expand All @@ -31,17 +55,19 @@ def __new__(cls, name, bases, namespace):
class TypesenseCollection(metaclass=TypesenseCollectionMeta):

query_by_fields: str = ''
schema_name: Optional[str] = ''
default_sorting_field: Optional[str] = ''
token_separators: Optional[list] = []
symbols_to_index: Optional[list] = []
schema_name: str = ''
default_sorting_field: str = ''
token_separators: list = []
symbols_to_index: list = []
synonyms: List[Synonym] = []

def __init__(self, obj: Union[object, QuerySet, Iterable] = None, many: bool = False, data: list = None):
assert self.query_by_fields, "`query_by_fields` must be specified in the collection definition"
assert not all([obj, data]), "`obj` and `data` cannot be provided together"

self._meta = self._get_metadata()
self.fields = self.get_fields()
self._synonyms = [synonym().data for synonym in self.synonyms]

# TODO: Make self.data a cached_property
if data:
Expand All @@ -61,7 +87,7 @@ def get_fields(cls) -> Dict[str, TypesenseField]:
A dictionary of the fields names to the field definition for this collection
"""
fields = {}
# Avoid Recursion Erros
# Avoid Recursion Errors
exclude_attributes = {'sortable_fields'}

for attr in dir(cls):
Expand Down Expand Up @@ -170,6 +196,8 @@ def update_typesense_collection(self):
"""
Update the schema of an existing collection
"""
self.create_or_update_synonyms()

current_schema = self.retrieve_typesense_collection()
schema_changes = {}
field_changes = []
Expand Down Expand Up @@ -232,3 +260,36 @@ def update(self):
except ObjectNotFound:
self.create_typesense_collection()
return client.collections[self.schema_name].documents.import_(self.data, {"action": "upsert"})

def create_or_update_synonyms(self):
current_synonyms = {}
for synonym in self.get_synonyms().get('synonyms', []):
name = synonym.pop('id')
current_synonyms[name] = synonym

defined_synonyms = {}
for synonym_data in self._synonyms:
defined_synonyms.update(synonym_data)

missing_synonyms_names = set(current_synonyms.keys()).difference(defined_synonyms.keys())

for synonym_name in missing_synonyms_names:
self.delete_synonym(synonym_name)

for synonym_name, synonym_data in defined_synonyms.items():
if synonym_name not in current_synonyms:
client.collections[self.schema_name].synonyms.upsert(synonym_name, synonym_data)
elif synonym_data != current_synonyms[synonym_name]:
client.collections[self.schema_name].synonyms.upsert(synonym_name, synonym_data)

def get_synonyms(self) -> dict:
"""List all synonyms associated with this collection"""
return client.collections[self.schema_name].synonyms.retrieve()

def get_synonym(self, synonym_name) -> dict:
"""Retrieve a single synonym by name"""
return client.collections[self.schema_name].synonyms[synonym_name].retrieve()

def delete_synonym(self, synonym_name):
"""Delete the synonym with the given name associated with this collection"""
return client.collections[self.schema_name].synonyms[synonym_name].delete()

0 comments on commit 4968cd7

Please sign in to comment.