From 1e2c70a14efd1e0edf72f64f67c517e642e9f6e7 Mon Sep 17 00:00:00 2001 From: legaltextai Date: Wed, 18 Sep 2024 20:59:03 -0400 Subject: [PATCH 1/8] fix(search): creates a model to capture user queries changes made to models.py and views.py to save user queries Fixes: #4230 --- cl/search/migrations/0035_searchquery.py | 77 +++++++++++++++++++ cl/search/migrations/0035_searchquery.sql | 10 +++ .../migrations/0035_searchquery_customers.sql | 10 +++ cl/search/models.py | 23 ++++++ cl/search/views.py | 13 +++- 5 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 cl/search/migrations/0035_searchquery.py create mode 100644 cl/search/migrations/0035_searchquery.sql create mode 100644 cl/search/migrations/0035_searchquery_customers.sql diff --git a/cl/search/migrations/0035_searchquery.py b/cl/search/migrations/0035_searchquery.py new file mode 100644 index 0000000000..507871a000 --- /dev/null +++ b/cl/search/migrations/0035_searchquery.py @@ -0,0 +1,77 @@ +# Generated by Django 5.1.1 on 2024-09-19 00:54 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("search", "0034_add_harvard_pdf_to_opinioncluster"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="SearchQuery", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "date_created", + models.DateTimeField( + auto_now_add=True, + db_index=True, + help_text="The moment when the item was created.", + ), + ), + ( + "date_modified", + models.DateTimeField( + auto_now=True, + db_index=True, + help_text="The last moment when the item was modified. A value in year 1750 indicates the value is unknown", + ), + ), + ( + "get_params", + models.TextField( + help_text="The GET parameters of the search query." + ), + ), + ( + "query_time_ms", + models.IntegerField( + help_text="The time taken to execute the query, in milliseconds." + ), + ), + ( + "hit_cache", + models.BooleanField( + help_text="Whether the query hit the cache or not." + ), + ), + ( + "user", + models.ForeignKey( + blank=True, + help_text="The user who performed this search query.", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="search_queries", + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/cl/search/migrations/0035_searchquery.sql b/cl/search/migrations/0035_searchquery.sql new file mode 100644 index 0000000000..63694c5a6c --- /dev/null +++ b/cl/search/migrations/0035_searchquery.sql @@ -0,0 +1,10 @@ +BEGIN; +-- +-- Create model SearchQuery +-- +CREATE TABLE "search_searchquery" ("id" integer NOT NULL PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, "date_created" timestamp with time zone NOT NULL, "date_modified" timestamp with time zone NOT NULL, "get_params" text NOT NULL, "query_time_ms" integer NOT NULL, "hit_cache" boolean NOT NULL, "user_id" integer NULL); +ALTER TABLE "search_searchquery" ADD CONSTRAINT "search_searchquery_user_id_8918791c_fk_auth_user_id" FOREIGN KEY ("user_id") REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED; +CREATE INDEX "search_searchquery_date_created_89023a7b" ON "search_searchquery" ("date_created"); +CREATE INDEX "search_searchquery_date_modified_ec5bc897" ON "search_searchquery" ("date_modified"); +CREATE INDEX "search_searchquery_user_id_8918791c" ON "search_searchquery" ("user_id"); +COMMIT; diff --git a/cl/search/migrations/0035_searchquery_customers.sql b/cl/search/migrations/0035_searchquery_customers.sql new file mode 100644 index 0000000000..63694c5a6c --- /dev/null +++ b/cl/search/migrations/0035_searchquery_customers.sql @@ -0,0 +1,10 @@ +BEGIN; +-- +-- Create model SearchQuery +-- +CREATE TABLE "search_searchquery" ("id" integer NOT NULL PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, "date_created" timestamp with time zone NOT NULL, "date_modified" timestamp with time zone NOT NULL, "get_params" text NOT NULL, "query_time_ms" integer NOT NULL, "hit_cache" boolean NOT NULL, "user_id" integer NULL); +ALTER TABLE "search_searchquery" ADD CONSTRAINT "search_searchquery_user_id_8918791c_fk_auth_user_id" FOREIGN KEY ("user_id") REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED; +CREATE INDEX "search_searchquery_date_created_89023a7b" ON "search_searchquery" ("date_created"); +CREATE INDEX "search_searchquery_date_modified_ec5bc897" ON "search_searchquery" ("date_modified"); +CREATE INDEX "search_searchquery_user_id_8918791c" ON "search_searchquery" ("user_id"); +COMMIT; diff --git a/cl/search/models.py b/cl/search/models.py index a0c808f3d3..8e0a2e6f4d 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -1,6 +1,7 @@ import re from datetime import datetime from typing import Any, Dict, List, Tuple, TypeVar +from django.contrib.auth.models import User import pghistory import pytz @@ -233,6 +234,28 @@ class SOURCES: ) +class SearchQuery(AbstractDateTimeModel): + user = models.ForeignKey( + User, + help_text="The user who performed this search query.", + related_name="search_queries", + on_delete=models.SET_NULL, + null=True, + blank=True, + ) + get_params = models.TextField( + help_text="The GET parameters of the search query." + ) + query_time_ms = models.IntegerField( + help_text="The time taken to execute the query, in milliseconds." + ) + hit_cache = models.BooleanField( + help_text="Whether the query hit the cache or not." + ) + + + + @pghistory.track(AfterUpdateOrDeleteSnapshot()) class OriginatingCourtInformation(AbstractDateTimeModel): """Lower court metadata to associate with appellate cases. diff --git a/cl/search/views.py b/cl/search/views.py index 539efaaed3..17f05cb3bb 100644 --- a/cl/search/views.py +++ b/cl/search/views.py @@ -79,7 +79,7 @@ UnbalancedQuotesQuery, ) from cl.search.forms import SearchForm, _clean_form -from cl.search.models import SEARCH_TYPES, Court, Opinion, OpinionCluster +from cl.search.models import SEARCH_TYPES, Court, Opinion, OpinionCluster, SearchQuery from cl.stats.models import Stat from cl.stats.utils import tally_stat from cl.visualizations.models import SCOTUSMap @@ -514,6 +514,15 @@ def show_results(request: HttpRequest) -> HttpResponse: if not is_bot(request): async_to_sync(tally_stat)("search.results") + + # Create and save the SearchQuery object + SearchQuery.objects.create( + get_params=request.GET.urlencode(), + query_time_ms=render_dict["query_time"], + hit_cache=render_dict["hit_cache"] + ) + + # Create bare-bones alert form. alert_form = CreateAlertForm( initial={"query": get_string, "rate": "dly"}, @@ -979,4 +988,4 @@ def fetch_and_paginate_results( settings.SEARCH_RESULTS_MICRO_CACHE, ) - return results, query_time, error, main_total, child_total + return results, query_time, error, main_total, child_total \ No newline at end of file From a8eba8353c7a951cc0007ea10d518645b3c3536f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Sep 2024 01:13:34 +0000 Subject: [PATCH 2/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cl/search/models.py | 4 +--- cl/search/views.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cl/search/models.py b/cl/search/models.py index 8e0a2e6f4d..75555218d8 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -1,12 +1,12 @@ import re from datetime import datetime from typing import Any, Dict, List, Tuple, TypeVar -from django.contrib.auth.models import User import pghistory import pytz from asgiref.sync import sync_to_async from celery.canvas import chain +from django.contrib.auth.models import User from django.contrib.contenttypes.fields import GenericRelation from django.contrib.postgres.indexes import HashIndex from django.core.exceptions import ValidationError @@ -253,8 +253,6 @@ class SearchQuery(AbstractDateTimeModel): help_text="Whether the query hit the cache or not." ) - - @pghistory.track(AfterUpdateOrDeleteSnapshot()) class OriginatingCourtInformation(AbstractDateTimeModel): diff --git a/cl/search/views.py b/cl/search/views.py index 17f05cb3bb..37538bed9c 100644 --- a/cl/search/views.py +++ b/cl/search/views.py @@ -79,7 +79,13 @@ UnbalancedQuotesQuery, ) from cl.search.forms import SearchForm, _clean_form -from cl.search.models import SEARCH_TYPES, Court, Opinion, OpinionCluster, SearchQuery +from cl.search.models import ( + SEARCH_TYPES, + Court, + Opinion, + OpinionCluster, + SearchQuery, +) from cl.stats.models import Stat from cl.stats.utils import tally_stat from cl.visualizations.models import SCOTUSMap @@ -514,15 +520,13 @@ def show_results(request: HttpRequest) -> HttpResponse: if not is_bot(request): async_to_sync(tally_stat)("search.results") - # Create and save the SearchQuery object SearchQuery.objects.create( get_params=request.GET.urlencode(), query_time_ms=render_dict["query_time"], - hit_cache=render_dict["hit_cache"] + hit_cache=render_dict["hit_cache"], ) - # Create bare-bones alert form. alert_form = CreateAlertForm( initial={"query": get_string, "rate": "dly"}, @@ -988,4 +992,4 @@ def fetch_and_paginate_results( settings.SEARCH_RESULTS_MICRO_CACHE, ) - return results, query_time, error, main_total, child_total \ No newline at end of file + return results, query_time, error, main_total, child_total From 4652fe68134fdc418e2e80f79a07b631dd08d15e Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Fri, 20 Sep 2024 19:57:30 -0500 Subject: [PATCH 3/8] fix(search.views): fix SearchQuery creation in `show_results` - Add tests for SearchQuery creation - Update user ForeignKey to on_delete=models.CASCADE, to match specs --- .../migrations/0036_alter_searchquery_user.py | 27 ++ cl/search/models.py | 2 +- cl/search/tests/tests.py | 20 +- cl/search/views.py | 324 +++++++++--------- 4 files changed, 215 insertions(+), 158 deletions(-) create mode 100644 cl/search/migrations/0036_alter_searchquery_user.py diff --git a/cl/search/migrations/0036_alter_searchquery_user.py b/cl/search/migrations/0036_alter_searchquery_user.py new file mode 100644 index 0000000000..15b240e660 --- /dev/null +++ b/cl/search/migrations/0036_alter_searchquery_user.py @@ -0,0 +1,27 @@ +# Generated by Django 5.1.1 on 2024-09-21 01:05 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("search", "0035_searchquery"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AlterField( + model_name="searchquery", + name="user", + field=models.ForeignKey( + blank=True, + help_text="The user who performed this search query.", + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="search_queries", + to=settings.AUTH_USER_MODEL, + ), + ), + ] diff --git a/cl/search/models.py b/cl/search/models.py index 75555218d8..39cdefbc02 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -239,7 +239,7 @@ class SearchQuery(AbstractDateTimeModel): User, help_text="The user who performed this search query.", related_name="search_queries", - on_delete=models.SET_NULL, + on_delete=models.CASCADE, null=True, blank=True, ) diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py index b8f85f719d..28747aa00e 100644 --- a/cl/search/tests/tests.py +++ b/cl/search/tests/tests.py @@ -90,6 +90,7 @@ Opinion, OpinionCluster, RECAPDocument, + SearchQuery, sort_cites, ) from cl.search.tasks import ( @@ -1134,7 +1135,7 @@ class OpinionSearchFunctionalTest(AudioTestCase, BaseSeleniumTest): ] def setUp(self) -> None: - UserProfileWithParentsFactory.create( + self.pandora_profile = UserProfileWithParentsFactory.create( user__username="pandora", user__password=make_password("password"), ) @@ -1523,6 +1524,23 @@ def test_basic_homepage_search_and_signin_and_signout(self) -> None: bootstrap_btns = self.browser.find_elements(By.CSS_SELECTOR, "a.btn") self.assertIn("Sign Back In", [btn.text for btn in bootstrap_btns]) + # We are taking advantage of the queries done with authenticated and + # anonymous user to see if SearchQuery collection is working + lookup = { + "get_params": "q=lissner", + "user": None, + "query_time_ms__gte": 0, + } + self.assertTrue( + SearchQuery.objects.filter(**lookup).exists(), + "a SearchQuery with get_params 'q=lissner' and anonymous user should have been created", + ) + lookup["user"] = self.pandora_profile.user + self.assertTrue( + SearchQuery.objects.filter(**lookup).exists(), + "a SearchQuery with get_params 'q=lissner' and 'pandora' user should have been created", + ) + class CaptionTest(TestCase): """Can we make good looking captions?""" diff --git a/cl/search/views.py b/cl/search/views.py index 37538bed9c..0af5c41834 100644 --- a/cl/search/views.py +++ b/cl/search/views.py @@ -1,5 +1,7 @@ import logging +import math import pickle +import time import traceback from datetime import date, datetime, timedelta, timezone from urllib.parse import quote @@ -405,171 +407,181 @@ def show_results(request: HttpRequest) -> HttpResponse: render_dict.update({"alert_form": alert_form}) return TemplateResponse(request, "search.html", render_dict) - else: - # Either a search or the homepage - if len(request.GET) == 0: - # No parameters --> Homepage. - if not is_bot(request): - async_to_sync(tally_stat)("search.homepage_loaded") - - # Ensure we get nothing from the future. - mutable_GET = request.GET.copy() # Makes it mutable - mutable_GET["filed_before"] = date.today() - - # Load the render_dict with good results that can be shown in the - # "Latest Cases" section - if not waffle.flag_is_active(request, "o-es-active"): - search = do_search( - mutable_GET, - rows=5, - override_params={"order_by": "dateFiled desc"}, - facet=False, - cache_key="homepage-data-o", - ) - else: - mutable_GET.update( - { - "order_by": "dateArgued desc", - "type": SEARCH_TYPES.OPINION, - } - ) - search = do_es_search( - mutable_GET, - rows=5, - facet=False, - cache_key="homepage-data-o-es", - ) - - render_dict.update(**search) - # Rename dictionary key "results" to "results_o" for consistency. - render_dict["results_o"] = render_dict.pop("results") + # This is a GET request: Either a search or the homepage + if len(request.GET) == 0: + # No parameters --> Homepage. + if not is_bot(request): + async_to_sync(tally_stat)("search.homepage_loaded") + + # Ensure we get nothing from the future. + mutable_GET = request.GET.copy() # Makes it mutable + mutable_GET["filed_before"] = date.today() + + # Load the render_dict with good results that can be shown in the + # "Latest Cases" section + if not waffle.flag_is_active(request, "o-es-active"): + search = do_search( + mutable_GET, + rows=5, + override_params={"order_by": "dateFiled desc"}, + facet=False, + cache_key="homepage-data-o", + ) + else: + mutable_GET.update( + { + "order_by": "dateArgued desc", + "type": SEARCH_TYPES.OPINION, + } + ) + search = do_es_search( + mutable_GET, + rows=5, + facet=False, + cache_key="homepage-data-o-es", + ) - # Get the results from the oral arguments as well - # Check if waffle flag is active. - if not waffle.flag_is_active(request, "oa-es-active"): - render_dict.update( - { - "results_oa": do_search( - mutable_GET, - rows=5, - override_params={ - "order_by": "dateArgued desc", - "type": SEARCH_TYPES.ORAL_ARGUMENT, - }, - facet=False, - cache_key="homepage-data-oa", - )["results"] - } - ) - else: - # Add additional or overridden GET parameters - mutable_GET.update( - { - "order_by": "dateArgued desc", - "type": SEARCH_TYPES.ORAL_ARGUMENT, - } - ) - render_dict.update( - { - "results_oa": do_es_search( - mutable_GET, - rows=5, - facet=False, - cache_key="homepage-data-oa-es", - )["results"] - } - ) + render_dict.update(**search) + # Rename dictionary key "results" to "results_o" for consistency. + render_dict["results_o"] = render_dict.pop("results") - # But give it a fresh form for the advanced search section + # Get the results from the oral arguments as well + # Check if waffle flag is active. + if not waffle.flag_is_active(request, "oa-es-active"): + render_dict.update( + { + "results_oa": do_search( + mutable_GET, + rows=5, + override_params={ + "order_by": "dateArgued desc", + "type": SEARCH_TYPES.ORAL_ARGUMENT, + }, + facet=False, + cache_key="homepage-data-oa", + )["results"] + } + ) + else: + # Add additional or overridden GET parameters + mutable_GET.update( + { + "order_by": "dateArgued desc", + "type": SEARCH_TYPES.ORAL_ARGUMENT, + } + ) render_dict.update( - {"search_form": SearchForm(request.GET, request=request)} + { + "results_oa": do_es_search( + mutable_GET, + rows=5, + facet=False, + cache_key="homepage-data-oa-es", + )["results"] + } ) - # Get a bunch of stats. - stats = get_homepage_stats() - render_dict.update(stats) + # But give it a fresh form for the advanced search section + render_dict.update( + {"search_form": SearchForm(request.GET, request=request)} + ) - return TemplateResponse(request, "homepage.html", render_dict) - else: - # User placed a search or is trying to edit an alert - if request.GET.get("edit_alert"): - # They're editing an alert - if request.user.is_anonymous: - return HttpResponseRedirect( - "{path}?next={next}{encoded_params}".format( - path=reverse("sign-in"), - next=request.path, - encoded_params=quote( - f"?{request.GET.urlencode()}" - ), - ) - ) - else: - alert = get_object_or_404( - Alert, - pk=request.GET.get("edit_alert"), - user=request.user, - ) - alert_form = CreateAlertForm( - instance=alert, - initial={"query": get_string_sans_alert}, - user=request.user, - ) + # Get a bunch of stats. + stats = get_homepage_stats() + render_dict.update(stats) + + return TemplateResponse(request, "homepage.html", render_dict) + + # This is a GET with parameters + # User placed a search or is trying to edit an alert + if request.GET.get("edit_alert"): + # They're editing an alert + if request.user.is_anonymous: + return HttpResponseRedirect( + "{path}?next={next}{encoded_params}".format( + path=reverse("sign-in"), + next=request.path, + encoded_params=quote(f"?{request.GET.urlencode()}"), + ) + ) + + alert = get_object_or_404( + Alert, + pk=request.GET.get("edit_alert"), + user=request.user, + ) + alert_form = CreateAlertForm( + instance=alert, + initial={"query": get_string_sans_alert}, + user=request.user, + ) + search_query = None + else: + # Just a regular search + if not is_bot(request): + async_to_sync(tally_stat)("search.results") + + # Create bare-bones alert form. + alert_form = CreateAlertForm( + initial={"query": get_string, "rate": "dly"}, + user=request.user, + ) + + search_query = SearchQuery( + user=None if request.user.is_anonymous else request.user, + get_params=request.GET.urlencode(), + query_time_ms=0, + hit_cache=False, + ) + + # measure query time for SearchQuery object + timer_start = time.perf_counter_ns() + search_type = request.GET.get("type", SEARCH_TYPES.OPINION) + match search_type: + case SEARCH_TYPES.PARENTHETICAL: + render_dict.update(do_es_search(request.GET.copy())) + case SEARCH_TYPES.ORAL_ARGUMENT: + # Check if waffle flag is active. + if waffle.flag_is_active(request, "oa-es-active"): + render_dict.update(do_es_search(request.GET.copy())) else: - # Just a regular search - if not is_bot(request): - async_to_sync(tally_stat)("search.results") - - # Create and save the SearchQuery object - SearchQuery.objects.create( - get_params=request.GET.urlencode(), - query_time_ms=render_dict["query_time"], - hit_cache=render_dict["hit_cache"], - ) + render_dict.update(do_search(request.GET.copy())) + case SEARCH_TYPES.PEOPLE: + if waffle.flag_is_active(request, "p-es-active"): + render_dict.update(do_es_search(request.GET.copy())) + else: + render_dict.update(do_search(request.GET.copy())) + case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS: + if waffle.flag_is_active(request, "r-es-active"): + search_results = do_es_search(request.GET.copy()) + else: + search_results = do_search(request.GET.copy()) + render_dict.update(search_results) + case SEARCH_TYPES.OPINION: + if waffle.flag_is_active(request, "o-es-active"): + render_dict.update(do_es_search(request.GET.copy())) + else: + render_dict.update(do_search(request.GET.copy())) + case SEARCH_TYPES.RECAP_DOCUMENT: + render_dict.update(do_es_search(request.GET.copy())) + case _: + render_dict.update(do_search(request.GET.copy())) - # Create bare-bones alert form. - alert_form = CreateAlertForm( - initial={"query": get_string, "rate": "dly"}, - user=request.user, - ) - search_type = request.GET.get("type", SEARCH_TYPES.OPINION) - match search_type: - case SEARCH_TYPES.PARENTHETICAL: - render_dict.update(do_es_search(request.GET.copy())) - case SEARCH_TYPES.ORAL_ARGUMENT: - # Check if waffle flag is active. - if waffle.flag_is_active(request, "oa-es-active"): - render_dict.update(do_es_search(request.GET.copy())) - else: - render_dict.update(do_search(request.GET.copy())) - case SEARCH_TYPES.PEOPLE: - if waffle.flag_is_active(request, "p-es-active"): - render_dict.update(do_es_search(request.GET.copy())) - else: - render_dict.update(do_search(request.GET.copy())) - case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS: - if waffle.flag_is_active(request, "r-es-active"): - search_results = do_es_search(request.GET.copy()) - else: - search_results = do_search(request.GET.copy()) - render_dict.update(search_results) - case SEARCH_TYPES.OPINION: - if waffle.flag_is_active(request, "o-es-active"): - render_dict.update(do_es_search(request.GET.copy())) - else: - render_dict.update(do_search(request.GET.copy())) - case SEARCH_TYPES.RECAP_DOCUMENT: - render_dict.update(do_es_search(request.GET.copy())) - case _: - render_dict.update(do_search(request.GET.copy())) - - # Set the value to the query as a convenience - alert_form.fields["name"].widget.attrs["value"] = render_dict[ - "search_summary_str" - ] - render_dict.update({"alert_form": alert_form}) + if search_query: + # convert nanoseconds to miliseconds + query_time_ms = math.ceil( + (time.perf_counter_ns() - timer_start) / 1_000_000 + ) + search_query.query_time_ms = query_time_ms + search_query.save() - return TemplateResponse(request, "search.html", render_dict) + # Set the value to the query as a convenience + alert_form.fields["name"].widget.attrs["value"] = render_dict[ + "search_summary_str" + ] + render_dict.update({"alert_form": alert_form}) + + return TemplateResponse(request, "search.html", render_dict) def advanced(request: HttpRequest) -> HttpResponse: From 4dada5b2a9c0c9789cb1e16e7321af71cacd43ee Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Thu, 3 Oct 2024 17:33:16 -0500 Subject: [PATCH 4/8] feat(SearchQuery): new model to store user queries --- cl/search/migrations/0036_searchquery.py | 73 +++++ cl/search/models.py | 30 ++ cl/search/tests/tests.py | 89 +++++- cl/search/views.py | 331 +++++++++++++---------- 4 files changed, 371 insertions(+), 152 deletions(-) create mode 100644 cl/search/migrations/0036_searchquery.py diff --git a/cl/search/migrations/0036_searchquery.py b/cl/search/migrations/0036_searchquery.py new file mode 100644 index 0000000000..29c2049c3b --- /dev/null +++ b/cl/search/migrations/0036_searchquery.py @@ -0,0 +1,73 @@ +# Generated by Django 5.1.1 on 2024-10-03 20:22 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("search", "0035_pghistory_v3_4_0_trigger_update"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="SearchQuery", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "get_params", + models.TextField( + help_text="The GET parameters of the search query." + ), + ), + ( + "query_time_ms", + models.IntegerField( + help_text="The milliseconds to execute the query, as returned in the ElasticSearch or Solr response." + ), + ), + ( + "hit_cache", + models.BooleanField( + help_text="Whether the query hit the cache or not." + ), + ), + ( + "date_created", + models.DateTimeField( + auto_now_add=True, + help_text="Datetime when the record was created", + ), + ), + ( + "user", + models.ForeignKey( + blank=True, + help_text="The user who performed this search query.", + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="search_queries", + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + "indexes": [ + models.Index( + fields=["date_created"], + name="search_sear_date_cr_c5fff9_idx", + ) + ], + }, + ), + ] diff --git a/cl/search/models.py b/cl/search/models.py index d2e13b50e9..4587e4f269 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -44,6 +44,7 @@ from cl.lib.string_utils import trunc from cl.lib.utils import deepgetattr from cl.search.docket_sources import DocketSources +from cl.users.models import User HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan") @@ -3921,3 +3922,32 @@ class SEARCH_TYPES: (PARENTHETICAL, "Parenthetical"), ) ALL_TYPES = [OPINION, RECAP, ORAL_ARGUMENT, PEOPLE] + + +class SearchQuery(models.Model): + user = models.ForeignKey( + User, + help_text="The user who performed this search query.", + related_name="search_queries", + on_delete=models.CASCADE, + null=True, + blank=True, + ) + get_params = models.TextField( + help_text="The GET parameters of the search query." + ) + query_time_ms = models.IntegerField( + help_text="The milliseconds to execute the query, as returned in the ElasticSearch or Solr response." + ) + hit_cache = models.BooleanField( + help_text="Whether the query hit the cache or not." + ) + date_created = models.DateTimeField( + help_text="Datetime when the record was created", + auto_now_add=True, + ) + + class Meta: + indexes = [ + models.Index(fields=["date_created"]), + ] diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py index b8f85f719d..f180b41515 100644 --- a/cl/search/tests/tests.py +++ b/cl/search/tests/tests.py @@ -4,6 +4,7 @@ from datetime import date from pathlib import Path from unittest import mock +from urllib.parse import parse_qs import pytz import time_machine @@ -15,7 +16,7 @@ from django.core.files.base import ContentFile from django.core.management import call_command from django.db import IntegrityError, transaction -from django.test import override_settings +from django.test import Client, override_settings from django.urls import reverse from django.utils.timezone import now from elasticsearch_dsl import Q @@ -25,6 +26,7 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from timeout_decorator import timeout_decorator +from waffle.testutils import override_flag from cl.audio.factories import AudioFactory from cl.lib.elasticsearch_utils import simplify_estimated_count @@ -90,6 +92,7 @@ Opinion, OpinionCluster, RECAPDocument, + SearchQuery, sort_cites, ) from cl.search.tasks import ( @@ -1134,7 +1137,7 @@ class OpinionSearchFunctionalTest(AudioTestCase, BaseSeleniumTest): ] def setUp(self) -> None: - UserProfileWithParentsFactory.create( + self.pandora_profile = UserProfileWithParentsFactory.create( user__username="pandora", user__password=make_password("password"), ) @@ -1523,6 +1526,88 @@ def test_basic_homepage_search_and_signin_and_signout(self) -> None: bootstrap_btns = self.browser.find_elements(By.CSS_SELECTOR, "a.btn") self.assertIn("Sign Back In", [btn.text for btn in bootstrap_btns]) + # We are taking advantage of the queries done with authenticated and + # anonymous user to see if SearchQuery collection is working + lookup = { + "get_params": "q=lissner", + "user": None, + "query_time_ms__gte": 0, + } + self.assertTrue( + SearchQuery.objects.filter(**lookup).exists(), + "a SearchQuery with get_params 'q=lissner' and anonymous user should have been created", + ) + SearchQuery.objects.filter(user=None).delete() + + lookup["user"] = self.pandora_profile.user + self.assertTrue( + SearchQuery.objects.filter(**lookup).exists(), + "a SearchQuery with get_params 'q=lissner' and 'pandora' user should have been created", + ) + + # Test if the SearchQuery get's deleted when the user is deleted + self.pandora_profile.user.delete() + lookup.pop("user") + self.assertFalse( + SearchQuery.objects.filter(**lookup).exists(), + "SearchQuery should have been deleted when the user was deleted", + ) + + +class SaveSearchQueryTest(TestCase): + def setUp(self) -> None: + self.client = Client() + # Using plain text, fielded queries and manual filters + self.searches = [ + # Recap + r"type=r&q=trump&type=r&order_by=score%20desc&description=Notice", + # Audio + r"type=oa&q=company%20court_id:illappct&type=oa&order_by=score desc", + # People + r"type=p&q=thomas&type=p&order_by=score%20desc&born_after=01/01/2080", + # Repeat the same query, for testing cache + r"type=p&q=thomas&type=p&order_by=score%20desc&born_after=01/01/2080", + ] + super().setUp() + + @override_settings(ELASTICSEARCH_MICRO_CACHE_ENABLED=True) + def test_search_query_saving(self) -> None: + """Do we save queries on all public endpoints""" + for query in self.searches: + url = f"{reverse('show_results')}?{query}" + self.client.get(url) + # Compare parsed query strings; sometimes the search process + # alters the order of the query parameters, or duplicates them + last_query = SearchQuery.objects.last() + parsed_query = parse_qs(query.replace("%20", "+")) + for key, stored_values in parse_qs(last_query.get_params).items(): + self.assertTrue( + parsed_query[key][0] == stored_values[0], + f"Query was not saved properly for endpoint {query}", + ) + + self.assertTrue( + SearchQuery.objects.last().hit_cache, + "Repeated query not marked as having hit cache", + ) + + # Force Solr use + @override_flag("oa-es-active", False) + @override_flag("r-es-active", False) + @override_flag("p-es-active", False) + def test_search_query_saving_solr(self) -> None: + """Are queries saved when using solr search (do_search)""" + for query in self.searches: + url = f"{reverse('show_results')}?{query}" + self.client.get(url) + last_query = SearchQuery.objects.last() + parsed_query = parse_qs(query.replace("%20", "+")) + for key, stored_values in parse_qs(last_query.get_params).items(): + self.assertTrue( + parsed_query[key][0] == stored_values[0], + f"Query was not saved properly for endpoint {query}", + ) + class CaptionTest(TestCase): """Can we make good looking captions?""" diff --git a/cl/search/views.py b/cl/search/views.py index 86142b0158..704ea9b865 100644 --- a/cl/search/views.py +++ b/cl/search/views.py @@ -2,6 +2,7 @@ import pickle import traceback from datetime import date, datetime, timedelta, timezone +from math import ceil from urllib.parse import quote import waffle @@ -79,7 +80,13 @@ UnbalancedQuotesQuery, ) from cl.search.forms import SearchForm, _clean_form -from cl.search.models import SEARCH_TYPES, Court, Opinion, OpinionCluster +from cl.search.models import ( + SEARCH_TYPES, + Court, + Opinion, + OpinionCluster, + SearchQuery, +) from cl.stats.models import Stat from cl.stats.utils import tally_stat from cl.visualizations.models import SCOTUSMap @@ -399,164 +406,188 @@ def show_results(request: HttpRequest) -> HttpResponse: render_dict.update({"alert_form": alert_form}) return TemplateResponse(request, "search.html", render_dict) - else: - # Either a search or the homepage - if len(request.GET) == 0: - # No parameters --> Homepage. - if not is_bot(request): - async_to_sync(tally_stat)("search.homepage_loaded") - - # Ensure we get nothing from the future. - mutable_GET = request.GET.copy() # Makes it mutable - mutable_GET["filed_before"] = date.today() - - # Load the render_dict with good results that can be shown in the - # "Latest Cases" section - if not waffle.flag_is_active(request, "o-es-active"): - search = do_search( - mutable_GET, - rows=5, - override_params={"order_by": "dateFiled desc"}, - facet=False, - cache_key="homepage-data-o", - ) - else: - mutable_GET.update( - { - "order_by": "dateArgued desc", - "type": SEARCH_TYPES.OPINION, - } - ) - search = do_es_search( - mutable_GET, - rows=5, - facet=False, - cache_key="homepage-data-o-es", - ) + # This is a GET request: Either a search or the homepage + if len(request.GET) == 0: + # No parameters --> Homepage. + if not is_bot(request): + async_to_sync(tally_stat)("search.homepage_loaded") + + # Ensure we get nothing from the future. + mutable_GET = request.GET.copy() # Makes it mutable + mutable_GET["filed_before"] = date.today() + + # Load the render_dict with good results that can be shown in the + # "Latest Cases" section + if not waffle.flag_is_active(request, "o-es-active"): + search = do_search( + mutable_GET, + rows=5, + override_params={"order_by": "dateFiled desc"}, + facet=False, + cache_key="homepage-data-o", + ) + else: + mutable_GET.update( + { + "order_by": "dateArgued desc", + "type": SEARCH_TYPES.OPINION, + } + ) + search = do_es_search( + mutable_GET, + rows=5, + facet=False, + cache_key="homepage-data-o-es", + ) - render_dict.update(**search) - # Rename dictionary key "results" to "results_o" for consistency. - render_dict["results_o"] = render_dict.pop("results") + render_dict.update(**search) + # Rename dictionary key "results" to "results_o" for consistency. + render_dict["results_o"] = render_dict.pop("results") - # Get the results from the oral arguments as well - # Check if waffle flag is active. - if not waffle.flag_is_active(request, "oa-es-active"): - render_dict.update( - { - "results_oa": do_search( - mutable_GET, - rows=5, - override_params={ - "order_by": "dateArgued desc", - "type": SEARCH_TYPES.ORAL_ARGUMENT, - }, - facet=False, - cache_key="homepage-data-oa", - )["results"] - } - ) - else: - # Add additional or overridden GET parameters - mutable_GET.update( - { - "order_by": "dateArgued desc", - "type": SEARCH_TYPES.ORAL_ARGUMENT, - } - ) - render_dict.update( - { - "results_oa": do_es_search( - mutable_GET, - rows=5, - facet=False, - cache_key="homepage-data-oa-es", - )["results"] - } - ) - - # But give it a fresh form for the advanced search section + # Get the results from the oral arguments as well + # Check if waffle flag is active. + if not waffle.flag_is_active(request, "oa-es-active"): render_dict.update( - {"search_form": SearchForm(request.GET, request=request)} + { + "results_oa": do_search( + mutable_GET, + rows=5, + override_params={ + "order_by": "dateArgued desc", + "type": SEARCH_TYPES.ORAL_ARGUMENT, + }, + facet=False, + cache_key="homepage-data-oa", + )["results"] + } + ) + else: + # Add additional or overridden GET parameters + mutable_GET.update( + { + "order_by": "dateArgued desc", + "type": SEARCH_TYPES.ORAL_ARGUMENT, + } + ) + render_dict.update( + { + "results_oa": do_es_search( + mutable_GET, + rows=5, + facet=False, + cache_key="homepage-data-oa-es", + )["results"] + } ) - # Get a bunch of stats. - stats = get_homepage_stats() - render_dict.update(stats) + # But give it a fresh form for the advanced search section + render_dict.update( + {"search_form": SearchForm(request.GET, request=request)} + ) - return TemplateResponse(request, "homepage.html", render_dict) - else: - # User placed a search or is trying to edit an alert - if request.GET.get("edit_alert"): - # They're editing an alert - if request.user.is_anonymous: - return HttpResponseRedirect( - "{path}?next={next}{encoded_params}".format( - path=reverse("sign-in"), - next=request.path, - encoded_params=quote( - f"?{request.GET.urlencode()}" - ), - ) - ) - else: - alert = get_object_or_404( - Alert, - pk=request.GET.get("edit_alert"), - user=request.user, - ) - alert_form = CreateAlertForm( - instance=alert, - initial={"query": get_string_sans_alert}, - user=request.user, - ) + # Get a bunch of stats. + stats = get_homepage_stats() + render_dict.update(stats) + + return TemplateResponse(request, "homepage.html", render_dict) + + # This is a GET with parameters + # User placed a search or is trying to edit an alert + if request.GET.get("edit_alert"): + # They're editing an alert + if request.user.is_anonymous: + return HttpResponseRedirect( + "{path}?next={next}{encoded_params}".format( + path=reverse("sign-in"), + next=request.path, + encoded_params=quote(f"?{request.GET.urlencode()}"), + ) + ) + + alert = get_object_or_404( + Alert, + pk=request.GET.get("edit_alert"), + user=request.user, + ) + alert_form = CreateAlertForm( + instance=alert, + initial={"query": get_string_sans_alert}, + user=request.user, + ) + else: + # Just a regular search + if not is_bot(request): + async_to_sync(tally_stat)("search.results") + + # Create bare-bones alert form. + alert_form = CreateAlertForm( + initial={"query": get_string, "rate": "dly"}, + user=request.user, + ) + + search_type = request.GET.get("type", SEARCH_TYPES.OPINION) + search_results = {} + + match search_type: + case SEARCH_TYPES.PARENTHETICAL: + search_results = do_es_search(request.GET.copy()) + case SEARCH_TYPES.ORAL_ARGUMENT: + # Check if waffle flag is active. + if waffle.flag_is_active(request, "oa-es-active"): + search_results = do_es_search(request.GET.copy()) + else: + search_results = do_search(request.GET.copy()) + case SEARCH_TYPES.PEOPLE: + if waffle.flag_is_active(request, "p-es-active"): + search_results = do_es_search(request.GET.copy()) + else: + search_results = do_search(request.GET.copy()) + case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS: + if waffle.flag_is_active(request, "r-es-active"): + search_results = do_es_search(request.GET.copy()) + else: + search_results = do_search(request.GET.copy()) + case SEARCH_TYPES.OPINION: + if waffle.flag_is_active(request, "o-es-active"): + search_results = do_es_search(request.GET.copy()) else: - # Just a regular search - if not is_bot(request): - async_to_sync(tally_stat)("search.results") + search_results = do_search(request.GET.copy()) + case SEARCH_TYPES.RECAP_DOCUMENT: + search_results = do_es_search(request.GET.copy()) + case _: + search_results = do_search(request.GET.copy()) + + render_dict.update(search_results) + + search_query = SearchQuery( + user=None if request.user.is_anonymous else request.user, + get_params=request.GET.urlencode(), + hit_cache=False, + ) - # Create bare-bones alert form. - alert_form = CreateAlertForm( - initial={"query": get_string, "rate": "dly"}, - user=request.user, - ) - search_type = request.GET.get("type", SEARCH_TYPES.OPINION) - match search_type: - case SEARCH_TYPES.PARENTHETICAL: - render_dict.update(do_es_search(request.GET.copy())) - case SEARCH_TYPES.ORAL_ARGUMENT: - # Check if waffle flag is active. - if waffle.flag_is_active(request, "oa-es-active"): - render_dict.update(do_es_search(request.GET.copy())) - else: - render_dict.update(do_search(request.GET.copy())) - case SEARCH_TYPES.PEOPLE: - if waffle.flag_is_active(request, "p-es-active"): - render_dict.update(do_es_search(request.GET.copy())) - else: - render_dict.update(do_search(request.GET.copy())) - case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS: - if waffle.flag_is_active(request, "r-es-active"): - search_results = do_es_search(request.GET.copy()) - else: - search_results = do_search(request.GET.copy()) - render_dict.update(search_results) - case SEARCH_TYPES.OPINION: - if waffle.flag_is_active(request, "o-es-active"): - render_dict.update(do_es_search(request.GET.copy())) - else: - render_dict.update(do_search(request.GET.copy())) - case SEARCH_TYPES.RECAP_DOCUMENT: - render_dict.update(do_es_search(request.GET.copy())) - case _: - render_dict.update(do_search(request.GET.copy())) - - # Set the value to the query as a convenience - alert_form.fields["name"].widget.attrs["value"] = render_dict[ - "search_summary_str" - ] - render_dict.update({"alert_form": alert_form}) + # ElasticSearch and Solr return different query time / cache hit + # information + query_time = search_results.get("results_details", [None])[0] + if query_time is not None: + search_query.query_time_ms = ceil(query_time) + # We set ES `query_time` metadata with values 0 or 1 if cache is hit: + # 0: homepage cache; 1: micro cache + search_query.hit_cache = query_time in [0, 1] + search_query.save() + elif not search_results.get("error"): + query_time = search_results["results"]["object_list"]["QTime"] + # Solr searches are not cached unless a cache_key is passed + # No cache_key is passed for the endpoints we are storing + paginate_cached_solr_results.hit_cache = False + search_query.save() + + # Set the value to the query as a convenience + alert_form.fields["name"].widget.attrs["value"] = render_dict[ + "search_summary_str" + ] + render_dict.update({"alert_form": alert_form}) - return TemplateResponse(request, "search.html", render_dict) + return TemplateResponse(request, "search.html", render_dict) @never_cache From f5e76d58bba99821761d0cc12558c496ece31e9f Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Thu, 3 Oct 2024 17:45:24 -0500 Subject: [PATCH 5/8] delete bad migration files --- cl/search/migrations/0035_searchquery.py | 77 ------------------- cl/search/migrations/0035_searchquery.sql | 10 --- .../migrations/0035_searchquery_customers.sql | 10 --- .../migrations/0036_alter_searchquery_user.py | 27 ------- cl/search/views.py | 6 +- 5 files changed, 4 insertions(+), 126 deletions(-) delete mode 100644 cl/search/migrations/0035_searchquery.py delete mode 100644 cl/search/migrations/0035_searchquery.sql delete mode 100644 cl/search/migrations/0035_searchquery_customers.sql delete mode 100644 cl/search/migrations/0036_alter_searchquery_user.py diff --git a/cl/search/migrations/0035_searchquery.py b/cl/search/migrations/0035_searchquery.py deleted file mode 100644 index 507871a000..0000000000 --- a/cl/search/migrations/0035_searchquery.py +++ /dev/null @@ -1,77 +0,0 @@ -# Generated by Django 5.1.1 on 2024-09-19 00:54 - -import django.db.models.deletion -from django.conf import settings -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("search", "0034_add_harvard_pdf_to_opinioncluster"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ] - - operations = [ - migrations.CreateModel( - name="SearchQuery", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ( - "date_created", - models.DateTimeField( - auto_now_add=True, - db_index=True, - help_text="The moment when the item was created.", - ), - ), - ( - "date_modified", - models.DateTimeField( - auto_now=True, - db_index=True, - help_text="The last moment when the item was modified. A value in year 1750 indicates the value is unknown", - ), - ), - ( - "get_params", - models.TextField( - help_text="The GET parameters of the search query." - ), - ), - ( - "query_time_ms", - models.IntegerField( - help_text="The time taken to execute the query, in milliseconds." - ), - ), - ( - "hit_cache", - models.BooleanField( - help_text="Whether the query hit the cache or not." - ), - ), - ( - "user", - models.ForeignKey( - blank=True, - help_text="The user who performed this search query.", - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="search_queries", - to=settings.AUTH_USER_MODEL, - ), - ), - ], - options={ - "abstract": False, - }, - ), - ] diff --git a/cl/search/migrations/0035_searchquery.sql b/cl/search/migrations/0035_searchquery.sql deleted file mode 100644 index 63694c5a6c..0000000000 --- a/cl/search/migrations/0035_searchquery.sql +++ /dev/null @@ -1,10 +0,0 @@ -BEGIN; --- --- Create model SearchQuery --- -CREATE TABLE "search_searchquery" ("id" integer NOT NULL PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, "date_created" timestamp with time zone NOT NULL, "date_modified" timestamp with time zone NOT NULL, "get_params" text NOT NULL, "query_time_ms" integer NOT NULL, "hit_cache" boolean NOT NULL, "user_id" integer NULL); -ALTER TABLE "search_searchquery" ADD CONSTRAINT "search_searchquery_user_id_8918791c_fk_auth_user_id" FOREIGN KEY ("user_id") REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED; -CREATE INDEX "search_searchquery_date_created_89023a7b" ON "search_searchquery" ("date_created"); -CREATE INDEX "search_searchquery_date_modified_ec5bc897" ON "search_searchquery" ("date_modified"); -CREATE INDEX "search_searchquery_user_id_8918791c" ON "search_searchquery" ("user_id"); -COMMIT; diff --git a/cl/search/migrations/0035_searchquery_customers.sql b/cl/search/migrations/0035_searchquery_customers.sql deleted file mode 100644 index 63694c5a6c..0000000000 --- a/cl/search/migrations/0035_searchquery_customers.sql +++ /dev/null @@ -1,10 +0,0 @@ -BEGIN; --- --- Create model SearchQuery --- -CREATE TABLE "search_searchquery" ("id" integer NOT NULL PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, "date_created" timestamp with time zone NOT NULL, "date_modified" timestamp with time zone NOT NULL, "get_params" text NOT NULL, "query_time_ms" integer NOT NULL, "hit_cache" boolean NOT NULL, "user_id" integer NULL); -ALTER TABLE "search_searchquery" ADD CONSTRAINT "search_searchquery_user_id_8918791c_fk_auth_user_id" FOREIGN KEY ("user_id") REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED; -CREATE INDEX "search_searchquery_date_created_89023a7b" ON "search_searchquery" ("date_created"); -CREATE INDEX "search_searchquery_date_modified_ec5bc897" ON "search_searchquery" ("date_modified"); -CREATE INDEX "search_searchquery_user_id_8918791c" ON "search_searchquery" ("user_id"); -COMMIT; diff --git a/cl/search/migrations/0036_alter_searchquery_user.py b/cl/search/migrations/0036_alter_searchquery_user.py deleted file mode 100644 index 15b240e660..0000000000 --- a/cl/search/migrations/0036_alter_searchquery_user.py +++ /dev/null @@ -1,27 +0,0 @@ -# Generated by Django 5.1.1 on 2024-09-21 01:05 - -import django.db.models.deletion -from django.conf import settings -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("search", "0035_searchquery"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ] - - operations = [ - migrations.AlterField( - model_name="searchquery", - name="user", - field=models.ForeignKey( - blank=True, - help_text="The user who performed this search query.", - null=True, - on_delete=django.db.models.deletion.CASCADE, - related_name="search_queries", - to=settings.AUTH_USER_MODEL, - ), - ), - ] diff --git a/cl/search/views.py b/cl/search/views.py index e9b2cc46da..ef2798eda2 100644 --- a/cl/search/views.py +++ b/cl/search/views.py @@ -577,10 +577,12 @@ def show_results(request: HttpRequest) -> HttpResponse: search_query.hit_cache = query_time in [0, 1] search_query.save() elif not search_results.get("error"): - query_time = search_results["results"]["object_list"]["QTime"] + search_query.query_time_ms = ceil( + search_results["results"].object_list.QTime + ) # Solr searches are not cached unless a cache_key is passed # No cache_key is passed for the endpoints we are storing - paginate_cached_solr_results.hit_cache = False + search_query.hit_cache = False search_query.save() # Set the value to the query as a convenience From 818f27552bd22cce007f8b7ee02f699c0d774832 Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Mon, 7 Oct 2024 17:12:32 -0500 Subject: [PATCH 6/8] refactor(search.views): put SearchQuery creation into a function - rename migration from 'searchquery' to 'add_searchquery' - add .sql migration file --- ...searchquery.py => 0036_add_searchquery.py} | 0 cl/search/migrations/0036_add_searchquery.sql | 9 +++ cl/search/views.py | 58 +++++++++++-------- 3 files changed, 43 insertions(+), 24 deletions(-) rename cl/search/migrations/{0036_searchquery.py => 0036_add_searchquery.py} (100%) create mode 100644 cl/search/migrations/0036_add_searchquery.sql diff --git a/cl/search/migrations/0036_searchquery.py b/cl/search/migrations/0036_add_searchquery.py similarity index 100% rename from cl/search/migrations/0036_searchquery.py rename to cl/search/migrations/0036_add_searchquery.py diff --git a/cl/search/migrations/0036_add_searchquery.sql b/cl/search/migrations/0036_add_searchquery.sql new file mode 100644 index 0000000000..c15bc4a47b --- /dev/null +++ b/cl/search/migrations/0036_add_searchquery.sql @@ -0,0 +1,9 @@ +BEGIN; +-- +-- Create model SearchQuery +-- +CREATE TABLE "search_searchquery" ("id" integer NOT NULL PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, "get_params" text NOT NULL, "query_time_ms" integer NOT NULL, "hit_cache" boolean NOT NULL, "date_created" timestamp with time zone NOT NULL, "user_id" integer NULL); +ALTER TABLE "search_searchquery" ADD CONSTRAINT "search_searchquery_user_id_8918791c_fk_auth_user_id" FOREIGN KEY ("user_id") REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED; +CREATE INDEX "search_searchquery_user_id_8918791c" ON "search_searchquery" ("user_id"); +CREATE INDEX "search_sear_date_cr_c5fff9_idx" ON "search_searchquery" ("date_created"); +COMMIT; diff --git a/cl/search/views.py b/cl/search/views.py index ef2798eda2..502a76632a 100644 --- a/cl/search/views.py +++ b/cl/search/views.py @@ -337,6 +337,39 @@ def get_homepage_stats(): return homepage_data +def store_search_query(request: HttpRequest, search_results: dict) -> None: + """Saves an user's search query in a SearchQuery model + + The `hit_cache` and `query_time_ms` fields computation depend on + whether the search was executed via ElasticSearch or Solr + + :param request: the request object + :param search_results: the dict returned by `do_search` or + `do_es_search` functions + :return None + """ + search_query = SearchQuery( + user=None if request.user.is_anonymous else request.user, + get_params=request.GET.urlencode(), + ) + + query_time = search_results.get("results_details", [None])[0] + if query_time is not None: + search_query.query_time_ms = ceil(query_time) + # We set ES `query_time` metadata with values 0 or 1 if cache is hit: + # 0: homepage cache; 1: micro cache + search_query.hit_cache = query_time in [0, 1] + search_query.save() + elif not search_results.get("error"): + search_query.query_time_ms = ceil( + search_results["results"].object_list.QTime + ) + # Solr searches are not cached unless a cache_key is passed + # No cache_key is passed for the endpoints we are storing + search_query.hit_cache = False + search_query.save() + + @never_cache def show_results(request: HttpRequest) -> HttpResponse: """ @@ -560,30 +593,7 @@ def show_results(request: HttpRequest) -> HttpResponse: search_results = do_search(request.GET.copy()) render_dict.update(search_results) - - search_query = SearchQuery( - user=None if request.user.is_anonymous else request.user, - get_params=request.GET.urlencode(), - hit_cache=False, - ) - - # ElasticSearch and Solr return different query time / cache hit - # information - query_time = search_results.get("results_details", [None])[0] - if query_time is not None: - search_query.query_time_ms = ceil(query_time) - # We set ES `query_time` metadata with values 0 or 1 if cache is hit: - # 0: homepage cache; 1: micro cache - search_query.hit_cache = query_time in [0, 1] - search_query.save() - elif not search_results.get("error"): - search_query.query_time_ms = ceil( - search_results["results"].object_list.QTime - ) - # Solr searches are not cached unless a cache_key is passed - # No cache_key is passed for the endpoints we are storing - search_query.hit_cache = False - search_query.save() + store_search_query(request, search_results) # Set the value to the query as a convenience alert_form.fields["name"].widget.attrs["value"] = render_dict[ From c3ae47aebac49ca3f3a8ff26e005d2c4395fea43 Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Mon, 14 Oct 2024 10:32:19 -0500 Subject: [PATCH 7/8] fix(SearchQuery): add source, engine and failed fields - Update tests - move `store_search_query` to search_utils - Update migration file --- cl/lib/search_utils.py | 42 +++++++++++++ cl/search/migrations/0036_add_searchquery.py | 28 ++++++++- cl/search/migrations/0036_add_searchquery.sql | 5 +- cl/search/models.py | 27 ++++++++- cl/search/tests/tests.py | 14 +++++ cl/search/views.py | 59 +++---------------- 6 files changed, 116 insertions(+), 59 deletions(-) diff --git a/cl/lib/search_utils.py b/cl/lib/search_utils.py index 3923cf0f4a..dd98355451 100644 --- a/cl/lib/search_utils.py +++ b/cl/lib/search_utils.py @@ -1,5 +1,6 @@ import re from datetime import date, datetime, timedelta +from math import ceil from typing import Any, Dict, List, Optional, Tuple, Union, cast from urllib.parse import parse_qs, urlencode @@ -39,6 +40,7 @@ Court, OpinionCluster, RECAPDocument, + SearchQuery, ) HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan") @@ -1199,3 +1201,43 @@ async def clean_up_recap_document_file(item: RECAPDocument) -> None: item.page_count = None item.is_available = False await item.asave() + + +def store_search_query(request: HttpRequest, search_results: dict) -> None: + """Saves an user's search query in a SearchQuery model + + :param request: the request object + :param search_results: the dict returned by `do_search` or + `do_es_search` functions + :return None + """ + is_error = search_results.get("error") + is_es_search = search_results.get("results_details") is not None + + search_query = SearchQuery( + user=None if request.user.is_anonymous else request.user, + get_params=request.GET.urlencode(), + failed=is_error, + query_time_ms=None, + hit_cache=False, + source=SearchQuery.WEBSITE, + engine=SearchQuery.ELASTICSEARCH if is_es_search else SearchQuery.SOLR, + ) + if is_error: + # Leave `query_time_ms` as None if there is an error + search_query.save() + return + + if is_es_search: + # We set ES `query_time` metadata with values 0 or 1 if cache is hit: + # 0: homepage cache; 1: micro cache + search_query.query_time_ms = ceil(search_results["results_details"][0]) + search_query.hit_cache = search_query.query_time_ms in [0, 1] + else: + # Solr searches are not cached unless a cache_key is passed + # No cache_key is passed for the endpoints we are storing + search_query.query_time_ms = ceil( + search_results["results"].object_list.QTime + ) + + search_query.save() diff --git a/cl/search/migrations/0036_add_searchquery.py b/cl/search/migrations/0036_add_searchquery.py index 29c2049c3b..5241e55ce0 100644 --- a/cl/search/migrations/0036_add_searchquery.py +++ b/cl/search/migrations/0036_add_searchquery.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1.1 on 2024-10-03 20:22 +# Generated by Django 5.1.1 on 2024-10-14 15:20 import django.db.models.deletion from django.conf import settings @@ -6,6 +6,7 @@ class Migration(migrations.Migration): + dependencies = [ ("search", "0035_pghistory_v3_4_0_trigger_update"), migrations.swappable_dependency(settings.AUTH_USER_MODEL), @@ -24,6 +25,13 @@ class Migration(migrations.Migration): verbose_name="ID", ), ), + ( + "source", + models.SmallIntegerField( + choices=[(1, "Website"), (2, "API request")], + help_text="The interface used to perform the query.", + ), + ), ( "get_params", models.TextField( @@ -33,7 +41,8 @@ class Migration(migrations.Migration): ( "query_time_ms", models.IntegerField( - help_text="The milliseconds to execute the query, as returned in the ElasticSearch or Solr response." + help_text="The milliseconds to execute the query, as returned in the ElasticSearch or Solr response.", + null=True, ), ), ( @@ -42,11 +51,24 @@ class Migration(migrations.Migration): help_text="Whether the query hit the cache or not." ), ), + ( + "failed", + models.BooleanField( + help_text="True if there was an error executing the query." + ), + ), + ( + "engine", + models.SmallIntegerField( + choices=[(1, "Elasticsearch"), (2, "Solr")], + help_text="The engine that executed the search", + ), + ), ( "date_created", models.DateTimeField( auto_now_add=True, - help_text="Datetime when the record was created", + help_text="Datetime when the record was created.", ), ), ( diff --git a/cl/search/migrations/0036_add_searchquery.sql b/cl/search/migrations/0036_add_searchquery.sql index c15bc4a47b..4e55f27278 100644 --- a/cl/search/migrations/0036_add_searchquery.sql +++ b/cl/search/migrations/0036_add_searchquery.sql @@ -1,9 +1,8 @@ -BEGIN; -- -- Create model SearchQuery -- -CREATE TABLE "search_searchquery" ("id" integer NOT NULL PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, "get_params" text NOT NULL, "query_time_ms" integer NOT NULL, "hit_cache" boolean NOT NULL, "date_created" timestamp with time zone NOT NULL, "user_id" integer NULL); +CREATE TABLE "search_searchquery" ("id" integer NOT NULL PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, "source" smallint NOT NULL, "get_params" text NOT NULL, "query_time_ms" integer NULL, "hit_cache" boolean NOT NULL, "failed" boolean NOT NULL, "engine" smallint NOT NULL, "date_created" timestamp with time zone NOT NULL, "user_id" integer NULL); ALTER TABLE "search_searchquery" ADD CONSTRAINT "search_searchquery_user_id_8918791c_fk_auth_user_id" FOREIGN KEY ("user_id") REFERENCES "auth_user" ("id") DEFERRABLE INITIALLY DEFERRED; CREATE INDEX "search_searchquery_user_id_8918791c" ON "search_searchquery" ("user_id"); CREATE INDEX "search_sear_date_cr_c5fff9_idx" ON "search_searchquery" ("date_created"); -COMMIT; +COMMIT; \ No newline at end of file diff --git a/cl/search/models.py b/cl/search/models.py index 9eafdaca86..ba85dcb8fc 100644 --- a/cl/search/models.py +++ b/cl/search/models.py @@ -3926,6 +3926,18 @@ class SEARCH_TYPES: class SearchQuery(models.Model): + WEBSITE = 1 + API = 2 + SOURCES = ( + (WEBSITE, "Website"), + (API, "API request"), + ) + ELASTICSEARCH = 1 + SOLR = 2 + ENGINES = ( + (ELASTICSEARCH, "Elasticsearch"), + (SOLR, "Solr"), + ) user = models.ForeignKey( User, help_text="The user who performed this search query.", @@ -3934,17 +3946,28 @@ class SearchQuery(models.Model): null=True, blank=True, ) + source = models.SmallIntegerField( + help_text="The interface used to perform the query.", choices=SOURCES + ) get_params = models.TextField( help_text="The GET parameters of the search query." ) query_time_ms = models.IntegerField( - help_text="The milliseconds to execute the query, as returned in the ElasticSearch or Solr response." + help_text="The milliseconds to execute the query, as returned in " + "the ElasticSearch or Solr response.", + null=True, ) hit_cache = models.BooleanField( help_text="Whether the query hit the cache or not." ) + failed = models.BooleanField( + help_text="True if there was an error executing the query." + ) + engine = models.SmallIntegerField( + help_text="The engine that executed the search", choices=ENGINES + ) date_created = models.DateTimeField( - help_text="Datetime when the record was created", + help_text="Datetime when the record was created.", auto_now_add=True, ) diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py index f180b41515..342d864cb0 100644 --- a/cl/search/tests/tests.py +++ b/cl/search/tests/tests.py @@ -1585,6 +1585,8 @@ def test_search_query_saving(self) -> None: parsed_query[key][0] == stored_values[0], f"Query was not saved properly for endpoint {query}", ) + self.assertEqual(last_query.engine, SearchQuery.ELASTICSEARCH) + self.assertEqual(last_query.source, SearchQuery.WEBSITE) self.assertTrue( SearchQuery.objects.last().hit_cache, @@ -1607,6 +1609,18 @@ def test_search_query_saving_solr(self) -> None: parsed_query[key][0] == stored_values[0], f"Query was not saved properly for endpoint {query}", ) + self.assertEqual(last_query.engine, SearchQuery.SOLR) + self.assertEqual(last_query.source, SearchQuery.WEBSITE) + + def test_failed_es_search_queries(self) -> None: + """Do we flag failed ElasticSearch queries properly?""" + query = "type=r&q=contains/sproximity token" + url = f"{reverse('show_results')}?{query}" + self.client.get(url) + last_query = SearchQuery.objects.last() + self.assertTrue(last_query.failed) + self.assertEqual(last_query.query_time_ms, None) + self.assertEqual(last_query.engine, SearchQuery.ELASTICSEARCH) class CaptionTest(TestCase): diff --git a/cl/search/views.py b/cl/search/views.py index 502a76632a..10f3f4b7f9 100644 --- a/cl/search/views.py +++ b/cl/search/views.py @@ -1,10 +1,7 @@ import logging -import math import pickle -import time import traceback from datetime import date, datetime, timedelta, timezone -from math import ceil from urllib.parse import quote import waffle @@ -62,6 +59,7 @@ make_stats_variable, merge_form_with_courts, regroup_snippets, + store_search_query, ) from cl.lib.types import CleanData from cl.lib.utils import ( @@ -82,13 +80,7 @@ UnbalancedQuotesQuery, ) from cl.search.forms import SearchForm, _clean_form -from cl.search.models import ( - SEARCH_TYPES, - Court, - Opinion, - OpinionCluster, - SearchQuery, -) +from cl.search.models import SEARCH_TYPES, Court, Opinion, OpinionCluster from cl.stats.models import Stat from cl.stats.utils import tally_stat from cl.visualizations.models import SCOTUSMap @@ -266,17 +258,17 @@ def do_search( ) return { "results": paged_results, - "facet_fields": make_stats_variable(search_form, paged_results), "search_form": search_form, "search_summary_str": search_summary_str, "search_summary_dict": search_summary_dict, + "error": error, "courts": courts, "court_count_human": court_count_human, "court_count": court_count, "query_citation": query_citation, - "error": error, "cited_cluster": cited_cluster, "related_cluster": related_cluster, + "facet_fields": make_stats_variable(search_form, paged_results), } @@ -337,39 +329,6 @@ def get_homepage_stats(): return homepage_data -def store_search_query(request: HttpRequest, search_results: dict) -> None: - """Saves an user's search query in a SearchQuery model - - The `hit_cache` and `query_time_ms` fields computation depend on - whether the search was executed via ElasticSearch or Solr - - :param request: the request object - :param search_results: the dict returned by `do_search` or - `do_es_search` functions - :return None - """ - search_query = SearchQuery( - user=None if request.user.is_anonymous else request.user, - get_params=request.GET.urlencode(), - ) - - query_time = search_results.get("results_details", [None])[0] - if query_time is not None: - search_query.query_time_ms = ceil(query_time) - # We set ES `query_time` metadata with values 0 or 1 if cache is hit: - # 0: homepage cache; 1: micro cache - search_query.hit_cache = query_time in [0, 1] - search_query.save() - elif not search_results.get("error"): - search_query.query_time_ms = ceil( - search_results["results"].object_list.QTime - ) - # Solr searches are not cached unless a cache_key is passed - # No cache_key is passed for the endpoints we are storing - search_query.hit_cache = False - search_query.save() - - @never_cache def show_results(request: HttpRequest) -> HttpResponse: """ @@ -561,8 +520,6 @@ def show_results(request: HttpRequest) -> HttpResponse: ) search_type = request.GET.get("type", SEARCH_TYPES.OPINION) - search_results = {} - match search_type: case SEARCH_TYPES.PARENTHETICAL: search_results = do_es_search(request.GET.copy()) @@ -895,12 +852,12 @@ def do_es_search( "courts": courts, "court_count_human": court_count_human, "court_count": court_count, - "error_message": error_message, - "suggested_query": suggested_query, - "related_cluster": related_cluster, - "cited_cluster": cited_cluster, "query_citation": query_citation, + "cited_cluster": cited_cluster, + "related_cluster": related_cluster, "facet_fields": facet_fields, + "error_message": error_message, + "suggested_query": suggested_query, "estimated_count_threshold": simplify_estimated_count( compute_lowest_possible_estimate( settings.ELASTICSEARCH_CARDINALITY_PRECISION From 118ee0016cb1e3a031acc79f26b84c3a3664b22c Mon Sep 17 00:00:00 2001 From: Gianfranco Rossi Date: Mon, 14 Oct 2024 20:12:28 -0500 Subject: [PATCH 8/8] fix(store_search_query): simplify hit_cache for ES searches Special query time values that point to a cache hit are 0 and 1 (micro cache). We only store queries that may hit the micro cache, so we can simplify the case for SearchQuery.hit_cache computation --- cl/lib/search_utils.py | 5 ++--- cl/search/tests/tests.py | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/cl/lib/search_utils.py b/cl/lib/search_utils.py index dd98355451..b16446f9da 100644 --- a/cl/lib/search_utils.py +++ b/cl/lib/search_utils.py @@ -1229,10 +1229,9 @@ def store_search_query(request: HttpRequest, search_results: dict) -> None: return if is_es_search: - # We set ES `query_time` metadata with values 0 or 1 if cache is hit: - # 0: homepage cache; 1: micro cache search_query.query_time_ms = ceil(search_results["results_details"][0]) - search_query.hit_cache = search_query.query_time_ms in [0, 1] + # do_es_search returns 1 as query time if the micro cache was hit + search_query.hit_cache = search_query.query_time_ms == 1 else: # Solr searches are not cached unless a cache_key is passed # No cache_key is passed for the endpoints we are storing diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py index 342d864cb0..47a246f901 100644 --- a/cl/search/tests/tests.py +++ b/cl/search/tests/tests.py @@ -1569,6 +1569,9 @@ def setUp(self) -> None: r"type=p&q=thomas&type=p&order_by=score%20desc&born_after=01/01/2080", ] super().setUp() + self.source_error_message = ( + f"Saved wrong `engine` value, expected {SearchQuery.WEBSITE}" + ) @override_settings(ELASTICSEARCH_MICRO_CACHE_ENABLED=True) def test_search_query_saving(self) -> None: @@ -1585,8 +1588,16 @@ def test_search_query_saving(self) -> None: parsed_query[key][0] == stored_values[0], f"Query was not saved properly for endpoint {query}", ) - self.assertEqual(last_query.engine, SearchQuery.ELASTICSEARCH) - self.assertEqual(last_query.source, SearchQuery.WEBSITE) + self.assertEqual( + last_query.engine, + SearchQuery.ELASTICSEARCH, + f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}", + ) + self.assertEqual( + last_query.source, + SearchQuery.WEBSITE, + self.source_error_message, + ) self.assertTrue( SearchQuery.objects.last().hit_cache, @@ -1609,8 +1620,16 @@ def test_search_query_saving_solr(self) -> None: parsed_query[key][0] == stored_values[0], f"Query was not saved properly for endpoint {query}", ) - self.assertEqual(last_query.engine, SearchQuery.SOLR) - self.assertEqual(last_query.source, SearchQuery.WEBSITE) + self.assertEqual( + last_query.engine, + SearchQuery.SOLR, + f"Saved wrong `engine` value, expected {SearchQuery.SOLR}", + ) + self.assertEqual( + last_query.source, + SearchQuery.WEBSITE, + self.source_error_message, + ) def test_failed_es_search_queries(self) -> None: """Do we flag failed ElasticSearch queries properly?""" @@ -1618,9 +1637,15 @@ def test_failed_es_search_queries(self) -> None: url = f"{reverse('show_results')}?{query}" self.client.get(url) last_query = SearchQuery.objects.last() - self.assertTrue(last_query.failed) - self.assertEqual(last_query.query_time_ms, None) - self.assertEqual(last_query.engine, SearchQuery.ELASTICSEARCH) + self.assertTrue(last_query.failed, "SearchQuery.failed should be True") + self.assertEqual( + last_query.query_time_ms, None, "Query time should be None" + ) + self.assertEqual( + last_query.engine, + SearchQuery.ELASTICSEARCH, + f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}", + ) class CaptionTest(TestCase):