Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

4230 Create SearchQuery model to store user's queries #4479

Merged
merged 16 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions cl/lib/search_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
from datetime import date, datetime, timedelta
from math import ceil
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from urllib.parse import parse_qs, urlencode

Expand Down Expand Up @@ -39,6 +40,7 @@
Court,
OpinionCluster,
RECAPDocument,
SearchQuery,
)

HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan")
Expand Down Expand Up @@ -1199,3 +1201,42 @@ async def clean_up_recap_document_file(item: RECAPDocument) -> None:
item.page_count = None
item.is_available = False
await item.asave()


def store_search_query(request: HttpRequest, search_results: dict) -> None:
"""Saves an user's search query in a SearchQuery model

:param request: the request object
:param search_results: the dict returned by `do_search` or
`do_es_search` functions
:return None
"""
is_error = search_results.get("error")
is_es_search = search_results.get("results_details") is not None

search_query = SearchQuery(
user=None if request.user.is_anonymous else request.user,
get_params=request.GET.urlencode(),
failed=is_error,
query_time_ms=None,
hit_cache=False,
source=SearchQuery.WEBSITE,
engine=SearchQuery.ELASTICSEARCH if is_es_search else SearchQuery.SOLR,
)
if is_error:
# Leave `query_time_ms` as None if there is an error
search_query.save()
return

if is_es_search:
search_query.query_time_ms = ceil(search_results["results_details"][0])
# do_es_search returns 1 as query time if the micro cache was hit
search_query.hit_cache = search_query.query_time_ms == 1
else:
# Solr searches are not cached unless a cache_key is passed
# No cache_key is passed for the endpoints we are storing
search_query.query_time_ms = ceil(
search_results["results"].object_list.QTime
)

search_query.save()
95 changes: 95 additions & 0 deletions cl/search/migrations/0036_add_searchquery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Generated by Django 5.1.1 on 2024-10-14 15:20

import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("search", "0035_pghistory_v3_4_0_trigger_update"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]

operations = [
migrations.CreateModel(
name="SearchQuery",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"source",
models.SmallIntegerField(
choices=[(1, "Website"), (2, "API request")],
help_text="The interface used to perform the query.",
),
),
(
"get_params",
models.TextField(
help_text="The GET parameters of the search query."
),
),
(
"query_time_ms",
models.IntegerField(
help_text="The milliseconds to execute the query, as returned in the ElasticSearch or Solr response.",
null=True,
),
),
(
"hit_cache",
models.BooleanField(
help_text="Whether the query hit the cache or not."
),
),
(
"failed",
models.BooleanField(
help_text="True if there was an error executing the query."
),
),
(
"engine",
models.SmallIntegerField(
choices=[(1, "Elasticsearch"), (2, "Solr")],
help_text="The engine that executed the search",
),
),
(
"date_created",
models.DateTimeField(
auto_now_add=True,
help_text="Datetime when the record was created.",
),
),
(
"user",
models.ForeignKey(
blank=True,
help_text="The user who performed this search query.",
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="search_queries",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"indexes": [
models.Index(
fields=["date_created"],
name="search_sear_date_cr_c5fff9_idx",
)
],
},
),
]
8 changes: 8 additions & 0 deletions cl/search/migrations/0036_add_searchquery.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--
-- Create model SearchQuery
--
CREATE TABLE "search_searchquery" ("id" integer NOT NULL PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, "source" smallint NOT NULL, "get_params" text NOT NULL, "query_time_ms" integer NULL, "hit_cache" boolean NOT NULL, "failed" boolean NOT NULL, "engine" smallint NOT NULL, "date_created" timestamp with time zone NOT NULL, "user_id" integer NULL);
ALTER TABLE "search_searchquery" ADD CONSTRAINT "search_searchquery_user_id_8918791c_fk_auth_user_id" FOREIGN KEY ("user_id") REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED;
CREATE INDEX "search_searchquery_user_id_8918791c" ON "search_searchquery" ("user_id");
CREATE INDEX "search_sear_date_cr_c5fff9_idx" ON "search_searchquery" ("date_created");
COMMIT;
54 changes: 54 additions & 0 deletions cl/search/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytz
from asgiref.sync import sync_to_async
from celery.canvas import chain
from django.contrib.auth.models import User
from django.contrib.contenttypes.fields import GenericRelation
from django.contrib.postgres.indexes import HashIndex
from django.core.exceptions import ValidationError
Expand Down Expand Up @@ -44,6 +45,7 @@
from cl.lib.string_utils import trunc
from cl.lib.utils import deepgetattr
from cl.search.docket_sources import DocketSources
from cl.users.models import User

HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan")

Expand Down Expand Up @@ -3921,3 +3923,55 @@ class SEARCH_TYPES:
(PARENTHETICAL, "Parenthetical"),
)
ALL_TYPES = [OPINION, RECAP, ORAL_ARGUMENT, PEOPLE]


class SearchQuery(models.Model):
WEBSITE = 1
API = 2
SOURCES = (
(WEBSITE, "Website"),
(API, "API request"),
)
ELASTICSEARCH = 1
SOLR = 2
ENGINES = (
(ELASTICSEARCH, "Elasticsearch"),
(SOLR, "Solr"),
)
user = models.ForeignKey(
User,
help_text="The user who performed this search query.",
related_name="search_queries",
on_delete=models.CASCADE,
null=True,
blank=True,
)
source = models.SmallIntegerField(
help_text="The interface used to perform the query.", choices=SOURCES
)
get_params = models.TextField(
help_text="The GET parameters of the search query."
)
query_time_ms = models.IntegerField(
help_text="The milliseconds to execute the query, as returned in "
"the ElasticSearch or Solr response.",
null=True,
)
hit_cache = models.BooleanField(
help_text="Whether the query hit the cache or not."
)
failed = models.BooleanField(
help_text="True if there was an error executing the query."
)
engine = models.SmallIntegerField(
help_text="The engine that executed the search", choices=ENGINES
)
date_created = models.DateTimeField(
help_text="Datetime when the record was created.",
auto_now_add=True,
)

class Meta:
indexes = [
models.Index(fields=["date_created"]),
]
Loading
Loading