From 8bc14acbb23727bfed0464f7785a9cccc5fcfee9 Mon Sep 17 00:00:00 2001 From: EricOuma Date: Mon, 11 Sep 2023 02:36:47 +0300 Subject: [PATCH 1/2] add docstrings and update readme --- CONTRIBUTING.md | 30 ++++ README.md | 263 +++++++++++++++----------------- django_typesense/admin.py | 42 ++++- django_typesense/collections.py | 43 ++++-- django_typesense/utils.py | 70 +++------ setup.py | 1 + 6 files changed, 246 insertions(+), 203 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..5c24eaa --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,30 @@ +## Contribution +django-typesense’s git main branch should always be stable, production-ready & passing all tests. + +### Setting up the project +``` +# clone the repo +git clone https://gitlab.com/siege-software/packages/django_typesense.git +git checkout -b stable/1.x.x + +# Set up virtual environment +python3.8 -m venv venv +source venv/bin/activate + +pip install -r requirements-dev.txt + +# Enable automatic pre-commit hooks +pre-commit install +``` + +### Running Tests +``` +cd tests +pytest . +``` + +### Building the package +`python -m build` + +### Installing the package from build +` pip install path/to/django_typesense-0.0.1.tar.gz` diff --git a/README.md b/README.md index ec0c1bf..44ab144 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,13 @@ # django typesense + +[![Build](https://github.com/Siege-Software/django-typesense/workflows/build/badge.svg?branch=main)](https://github.com/Siege-Software/django-typesense/actions?workflow=CI) [![codecov](https://codecov.io/gh/Siege-Software/django-typesense/branch/main/graph/badge.svg?token=S4W0E84821)](https://codecov.io/gh/Siege-Software/django-typesense) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) ![PyPI download month](https://img.shields.io/pypi/dm/django-typesense.svg) [![PyPI version](https://badge.fury.io/py/django-typesense.svg)](https://pypi.python.org/pypi/django-typesense/) ![Python versions](https://img.shields.io/badge/python-%3E%3D3.8-brightgreen) -![Django Versions](https://img.shields.io/badge/django-%3E%3D4-brightgreen) +![Django Versions](https://img.shields.io/badge/django-%3E%3D3.2-brightgreen) +[![PyPI License](https://img.shields.io/pypi/l/django-typesense.svg)](https://pypi.python.org/pypi/django-typesense/) > [!WARNING] @@ -13,140 +16,158 @@ ## What is it? Faster Django Admin powered by [Typesense](https://typesense.org/) -## TODOs -- Performance comparison stats -## Note on ForeignKeys and OneToOneFields -- While data from foreign keys can be indexed, displaying them on the admin will trigger database queries that will negatively affect performance. -- We recommend indexing the string representation of the foreignkey as a model property to enable display on admin. -## How to use +## Quick Start Guide +### Installation `pip install django-typesense` -Install directly from github to test the most recent version -``` -pip install git+https://github.com/SiegeSoftware/django-typesense.git -``` +or install directly from github to test the most recent version + +`pip install git+https://github.com/SiegeSoftware/django-typesense.git` Add `django_typesense` to the list of installed apps. -You will need to set up the typesense server on your machine. -### Update the model to inherit from the Typesense model mixin +Follow this [guide](https://typesense.org/docs/guide/install-typesense.html#option-1-typesense-cloud) to install and run typesense + +### Create Collections +Throughout this guide, we’ll refer to the following models, which comprise a song catalogue application: + +``` +from django.db import models + + +class Genre(models.Model): + name = models.CharField(max_length=100) + + def __str__(self): + return self.name + + +class Artist(models.Model): + name = models.CharField(max_length=200) + + def __str__(self): + return self.name + +class Song(models.Model): + title = models.CharField(max_length=100) + genre = models.ForeignKey(Genre, on_delete=models.CASCADE) + release_date = models.DateField(blank=True, null=True) + artists = models.ManyToManyField(Artist) + number_of_comments = models.IntegerField(default=0) + number_of_views = models.IntegerField(default=0) + duration = models.DurationField() + description = models.TextField() + + def __str__(self): + return self.title + + @property + def release_date_timestamp(self): + # read https://typesense.org/docs/0.25.0/api/collections.html#indexing-dates + return self.release_date.timestamp() if self.release_date else self.release_date + + def artist_names(self): + return list(self.artists.all().values_list('name', flat=True)) + ``` -from django_typesense.models import TypesenseModelMixin, TypesenseQuerySet - -class MyModelManager(models.Manager): - """Manager for class :class:`.models.MyModelName` - """ - field1 = models... - field2 = models... - field3 = models... - date_created = models... + +For such an application, you might be interested in improving the search and load times on the song records list view. + +``` +from django_typesense.collections import TypesenseCollection +from django_typesense import fields + + +class SongCollection(TypesenseCollection): + # At least one of the indexed fields has to be provided as one of the `query_by_fields`. Must be a CharField + query_by_fields = 'title,artist_names' - typesense_fields = [ - { - "name": "field1", "type": "string", - }, - { - "name": "field2", "type": "int64" - }, - { - "name": "field3", "type": "string[]" - }, - { - "name": "date_created", "type": "int64" - } - ] - - typesense_default_sorting_field = 'date_created' - query_by_fields = ','.join( - [ - 'field1', 'field2', 'date_created' - ] - ) - - def get_typesense_dict(self): - """ - Create a data structure that can be serialized as JSON for Typesense fields. - - Normalize the structure if required. - - Returns: - dict: JSON-serializable data structure - """ - - typesense_dict = { - 'id': str(self.id), - 'field1': self.field1, - 'field2': self.field2, - 'field3': self.field3, - 'date_created': self.date_created.timestamp() - } - - return typesense_dict - - def get_queryset(self): - """ - Get an optimized queryset. - - Returns: - django.db.models.query.QuerySet: Queryset with instances of \ - :class:`.models.Work` - """ - return TypesenseQuerySet( - self.model, using=self._db - ) + title = fields.TypesenseCharField() + genre_name = fields.TypesenseCharField(value='genre.name') + genre_id = fields.TypesenseSmallIntegerField() + release_date = fields.TypesenseDateField(value='release_date_timestamp', optional=True) + artist_names = fields.TypesenseArrayField(base_field=fields.TypesenseCharField(), value='artist_names') + number_of_comments = fields.SmallIntegerField(index=False, optional=True) + number_of_views = fields.SmallIntegerField(index=False, optional=True) + duration = fields.DurationField() +``` +It's okay to store fields that you don't intend to search but to display on the admin. Such fields should be marked as un-indexed e.g: -class MyModelName(TypesenseModelMixin) + number_of_views = fields.SmallIntegerField(index=False, optional=True) + +Update the song model as follows: +``` +class Song(models.Model): + ... + collection_class = SongCollection ... - - objects = MyModelManager() ``` -`TypesenseQuerySet` is required to automatically index model changes on create, update and delete +How the value of a field is retrieved from a model instance: +1. The collection field name is called as a property of the model instance +2. If `value` is provided, it will be called as a property or method of the model instance + +Where the collections live is totally dependent on you but we recommend having a `collections.py` file in the django app where the model you are creating a collection for is. -### Admin Setup -To update a model admin to display and search from the model Typesense collection, the admin class should inherit from the TypesenseSearchAdminMixin +> [!NOTE] +> We recommend displaying data from ForeignKey or OneToOne fields as string attributes using the display decorator to avoid triggering database queries that will negatively affect performance. + +### Admin Integration +To make a model admin display and search from the model's Typesense collection, the admin class should inherit `TypesenseSearchAdminMixin` ``` from django_typesense.admin import TypesenseSearchAdminMixin -class MyModelAdmin(TypesenseSearchAdminMixin): - pass +@admin.register(Song) +class SongAdmin(TypesenseSearchAdminMixin): + ... + list_display = ['title', 'genre_name', 'release_date', 'number_of_views', 'duration'] + + @admin.display(description='Genre') + def genre_name(self, obj): + return obj.genre.name + ... ``` -### Bulk indexing typesense collections -To update or delete collection documents in bulk. Bulk updating is multi-threaded. -You might encounter poor performance when indexing large querysets. Suggestions on how to improve are welcome. +### Indexing +For the initial setup, you will need to index in bulk. Bulk updating is multi-threaded. Depending on your system specs, you should set the `batch_size` keyword argument. ``` -from django_typesense.methods import bulk_delete_typsense_records, bulk_update_typsense_records -from .models import MyModel -from django_typesense.typesense_client import client +from django_typesense.utils import bulk_delete_typsense_records, bulk_update_typsense_records -model_qs = Model.objects.all().order_by('date_created') # querysets should be ordered -bulk_update_typesense_records(model_qs) # for bulk document indexing -bulk_delete_typsense_records(model_qs) # for bulk document deletiom +model_qs = Song.objects.all().order_by('id') # querysets should be ordered +bulk_update_typesense_records(model_qs, batch_size=1024) ``` # Custom Admin Filters To make use of custom admin filters, define a `filter_by` property in the filter definition. -Define boolean typesense field `has_alien` that gets it's value from a model property. +Define boolean typesense field `has_views` that gets it's value from a model property. This is example is not necessarily practical but for demo purposes. ``` -@property -def has_alien(self): - # moon_aliens and mars_aliens are reverse foreign keys - return self.moon_aliens.exists() or self.mars_aliens.exists() +# models.py +class Song(models.Model): + ... + @property + def has_views(self): + return self.number_of_views > 0 + ... + +# collections.py +class SongCollection(TypesenseCollection): + ... + has_views = fields.TypesenseBooleanField() + ... ``` ``` -class HasAlienFilter(admin.SimpleListFilter): - title = _('Has Alien') - parameter_name = 'has_alien' +class HasViewsFilter(admin.SimpleListFilter): + title = _('Has Views') + parameter_name = 'has_views' def lookups(self, request, model_admin): return ( @@ -158,9 +179,9 @@ class HasAlienFilter(admin.SimpleListFilter): def queryset(self, request, queryset): # This is used by the default django admin if self.value() == 'True': - return queryset.filter(Q(mars_aliens__isnull=False) | Q(moon_aliens__isnull=False)) + return queryset.filter(number_of_views__gt=0) elif self.value() == 'False': - return queryset.filter(mars_aliens__isnull=True, moon_aliens__isnull=True) + return queryset.filter(number_of_views=0) return queryset @@ -168,44 +189,10 @@ class HasAlienFilter(admin.SimpleListFilter): def filter_by(self): # This is used by typesense if self.value() == 'True': - return {"has_alien": "=true"} + return {"has_views": "=true"} elif self.value() == 'False': - return {"has_alien": "!=false"} + return {"has_views": "!=false"} return {} ``` - -## Release Process -Each release has its own branch, called stable/version_number and any changes will be issued from those branches. -The main branch has the latest stable version - -## Contribution -TBA - -``` -# clone the repo -git clone https://gitlab.com/siege-software/packages/django_typesense.git -git checkout -b stable/1.x.x - -# Set up virtual environment -python3.8 -m venv venv -source venv/bin/activate - -pip install -r requirements-dev.txt - -# Enable automatic pre-commit hooks -pre-commit install -``` - -## Running Tests -``` -cd tests -pytest . -``` - -## Building the package -```python -m build``` - -## Installing the package from build -``` pip install path/to/django_typesense-0.0.1.tar.gz``` diff --git a/django_typesense/admin.py b/django_typesense/admin.py index 56e5995..a815ec1 100644 --- a/django_typesense/admin.py +++ b/django_typesense/admin.py @@ -213,15 +213,33 @@ def changelist_view(self, request, extra_context=None): return template_response def get_sortable_by(self, request): + """ + Get sortable fields; these are fields that sort is defaulted or set to True. + + Args: + request: the HttpRequest + + Returns: + A list of field names + """ + sortable_fields = super().get_sortable_by(request) collection = self.model.get_collection() return set(sortable_fields).intersection(collection.sortable_fields) def get_results(self, request): - # This is like ModelAdmin.get_queryset() - collection = self.model.get_collection() + """ + Get all indexed data without any filtering or specific search terms. Works like `ModelAdmin.get_queryset()` + + Args: + request: the HttpRequest + + Returns: + A list of the typesense results + """ + return typesense_search( - collection_name=collection.schema_name, q="*" + collection_name=self.model.collection_class.schema_name, q="*" ) def get_changelist(self, request, **kwargs): @@ -241,14 +259,22 @@ def get_paginator( def get_typesense_search_results(self, search_term, page_num, filter_by, sort_by): """ - Return a tuple containing a objs to implement the django_typesense - and a boolean indicating if the results may contain duplicates. + Get the results from typesense with the provided filtering, sorting, pagination and search parameters applied + + Args: + search_term: The search term provided in the search form + page_num: The requested page number + filter_by: The filtering parameters + sort_by: The sort parameters + + Returns: + A list of typesense results """ - collection = self.model.get_collection() + results = typesense_search( - collection_name=collection.schema_name, + collection_name=self.model.collection_class.schema_name, q=search_term or "*", - query_by=collection.query_by_fields, + query_by=self.model.collection_class.query_by_fields, page=page_num, per_page=self.list_per_page, filter_by=filter_by, diff --git a/django_typesense/collections.py b/django_typesense/collections.py index 2a364af..76e2a64 100644 --- a/django_typesense/collections.py +++ b/django_typesense/collections.py @@ -1,9 +1,7 @@ -import copy -from typing import Optional, List, Iterable, Any, Union, Dict +from typing import Optional, Iterable, Union, Dict -from django.db import models from django.db.models import QuerySet -from django.utils.functional import cached_property, classproperty +from django.utils.functional import cached_property from typesense.exceptions import ObjectNotFound from django_typesense.fields import TypesenseField, TypesenseCharField @@ -48,6 +46,10 @@ def __init__(self, obj: Union[object, QuerySet, Iterable] = None, many: bool = F self.data = [] def get_fields(self) -> Dict[str, TypesenseField]: + """ + Returns: + A dictionary of the fields names to the field definition for this collection + """ fields = {} for attr in dir(self): @@ -68,12 +70,16 @@ def get_fields(self) -> Dict[str, TypesenseField]: return fields @classmethod - def _get_metadata(cls): + def _get_metadata(cls) -> dict: defined_meta_options = _COLLECTION_META_OPTIONS.intersection(set(dir(cls))) return {meta_option: getattr(cls, meta_option) for meta_option in defined_meta_options} @cached_property - def validated_data(self): + def validated_data(self) -> list: + """ + Returns a list of the collection data with values converted into the correct Python objects + """ + _validated_data = [] for obj in self.data: @@ -90,21 +96,38 @@ def __str__(self): return f"{self.schema_name} TypesenseCollection" @property - def sortable_fields(self): + def sortable_fields(self) -> list: return [field.name for field in self.fields.values() if field.sort] - def get_field(self, name): + def get_field(self, name) -> TypesenseField: + """ + Get the field with the provided name from the collection + + Args: + name: the field name + + Returns: + A TypesenseField + """ return self.fields[name] @cached_property - def schema_fields(self): + def schema_fields(self) -> list: + """ + Returns: + A list of dictionaries with field attributes needed by typesense for schema creation + """ return [field.attrs for field in self.fields.values()] def _get_object_data(self, obj): return {field.name: field.value(obj) for field in self.fields.values()} @cached_property - def schema(self): + def schema(self) -> dict: + """ + Returns: + The typesense schema + """ return { "name": self.schema_name, "fields": self.schema_fields, diff --git a/django_typesense/utils.py b/django_typesense/utils.py index 08ab991..4a04cae 100644 --- a/django_typesense/utils.py +++ b/django_typesense/utils.py @@ -5,40 +5,18 @@ from django.db.models import QuerySet from django.core.paginator import Paginator -from typesense.exceptions import ObjectNotFound - +from django_typesense.collections import TypesenseCollection from django_typesense.typesense_client import client logger = logging.getLogger(__name__) -def update_batch( - collection_name: str, documents_queryset: QuerySet, batch_no: int -) -> None: +def update_batch(documents_queryset: QuerySet, collection_class: TypesenseCollection, batch_no: int) -> None: """ Helper function that updates a batch of documents using the Typesense API. """ - documents = [document.get_typesense_dict() for document in documents_queryset] - - if not len(documents): - logger.warning( - f"Skipping updating the collection {collection_name} with an empty list" - ) - return - - try: - responses = client.collections[collection_name].documents.import_( - documents, {"action": "upsert"} - ) - created = False - except ObjectNotFound: - documents_queryset.model.create_typesense_collection() - responses = client.collections[collection_name].documents.import_( - documents, {"action": "upsert"} - ) - created = True - - # responses is a list with a response for each document in documents + collection = collection_class(documents_queryset, many=True) + responses = collection.update() failure_responses = [response for response in responses if not response["success"]] if failure_responses: @@ -46,28 +24,22 @@ def update_batch( f"An Error occurred during the bulk update: {failure_responses}" ) - if created: - logger.debug( - f"Batch {batch_no} Created and Updated with {len(documents)} records ✓" - ) - else: - logger.debug(f"Batch {batch_no} Updated with {len(documents)} records ✓") + logger.debug(f"Batch {batch_no} Updated with {len(collection.data)} records ✓") -def bulk_update_typesense_records( - records_queryset: QuerySet, collection_name: str = None -) -> None: +def bulk_update_typesense_records(records_queryset: QuerySet, batch_size: int = 1024) -> None: """ This method updates Typesense records for both objs .update() calls from Typesense mixin subclasses. This function should be called on every model update statement for data consistency Parameters: records_queryset (QuerySet): the QuerySet should be from a Typesense mixin subclass - collection_name (str): The collection name + batch_size: how many objects are indexed in a single run Returns: None """ + from django_typesense.mixins import TypesenseQuerySet if not isinstance(records_queryset, TypesenseQuerySet): @@ -86,10 +58,7 @@ def bulk_update_typesense_records( "Please provide an ordered objs" ) - if not collection_name: - collection_name = records_queryset.model.__name__.lower() - - batch_size = 500 + collection_class = records_queryset.model.collection_class paginator = Paginator(records_queryset, batch_size) threads = os.cpu_count() @@ -97,12 +66,8 @@ def bulk_update_typesense_records( futures = [] for page_no in paginator.page_range: documents_queryset = paginator.page(page_no).object_list - logger.debug( - f"Updating batch {page_no} of {paginator.num_pages} batches of size {len(documents_queryset)}" - ) - future = executor.submit( - update_batch, collection_name, documents_queryset, page_no - ) + logger.debug(f"Updating batch {page_no} of {paginator.num_pages}") + future = executor.submit(update_batch, documents_queryset, collection_class, page_no) futures.append(future) for future in concurrent.futures.as_completed(futures): @@ -119,8 +84,8 @@ def bulk_delete_typesense_records(document_ids: list, collection_name: str) -> N Returns: None - """ + try: client.collections[collection_name].documents.delete( {"filter_by": f"id:{document_ids}"} @@ -130,6 +95,17 @@ def bulk_delete_typesense_records(document_ids: list, collection_name: str) -> N def typesense_search(collection_name, **kwargs): + """ + Perform a search on the specified collection using the parameters provided. + + Args: + collection_name: the schema name of the collection to perform the search on + **kwargs: typesense search parameters + + Returns: + A list of the typesense results + """ + if not collection_name: return diff --git a/setup.py b/setup.py index 9973d4f..706e6e1 100644 --- a/setup.py +++ b/setup.py @@ -14,4 +14,5 @@ include_package_data=True, long_description=open("README.md").read(), long_description_content_type="text/markdown", + license_files=("LICENSE",), ) From d821f89afb9a8c90a70288f6beb1195b5c532c9e Mon Sep 17 00:00:00 2001 From: EricOuma Date: Mon, 11 Sep 2023 02:39:25 +0300 Subject: [PATCH 2/2] update patch version --- django_typesense/__init__.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/django_typesense/__init__.py b/django_typesense/__init__.py index 89774e6..485f44a 100644 --- a/django_typesense/__init__.py +++ b/django_typesense/__init__.py @@ -1 +1 @@ -__version__ = "0.1.1-alpha" +__version__ = "0.1.1" diff --git a/setup.py b/setup.py index 706e6e1..79fffa0 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ name="django_typesense", author="Siege Software", author_email="info@siege.ai", - version="0.1.1-alpha", + version="0.1.1", install_requires=[ "django", "typesense",