From b5cde55a52974899828e4cccdf97d6ed4fdc351b Mon Sep 17 00:00:00 2001 From: Kelvin Muchiri Date: Thu, 19 Sep 2024 15:41:24 +0300 Subject: [PATCH] Add support for `OR` operation data filter for date fields (#2704) * add support for querying "or" data by meta data fields * add test * add test * remove extra underscore * add tests * add test * refactor code * add support for ISO date format when filtering data * refactor code * refactor code * refactor code * update documentation * add support for date format %Y-%m-%dT%H:%M:%S%z data endpoint filter * remove unnecessary parsing for dates * revert modified test * update docs * update docs * update docs * update docs * update tests * remove unsupport example from docs * remove tests for unsupported operation * handle invalida date formats when filtering data * rename tests * update doc string * remove commented code * update tag for ona-oidc * enhance error message * fix import lint error fix wrong-import-order / standard import "from builtins import str as text" should be placed before "import six" * fix lint error fix wrong-import-order / third party import "import six" should be placed before "from onadata.libs.utils.common_tags import KNOWN_DATE_FORMATS" * fix indentation * enhance error message * fix lint error line-too-long --- docs/data.rst | 18 +++++ .../api/tests/viewsets/test_data_viewset.py | 73 +++++++++++++++++++ onadata/apps/viewer/parsed_instance_tools.py | 41 +++++++++-- .../apps/viewer/tests/test_parsed_instance.py | 45 +++++++++++- onadata/libs/exceptions.py | 8 +- onadata/libs/utils/common_tags.py | 6 ++ requirements/base.in | 2 +- requirements/base.pip | 2 +- requirements/dev.pip | 2 +- 9 files changed, 186 insertions(+), 11 deletions(-) diff --git a/docs/data.rst b/docs/data.rst index 3a1525ceb6..3048d7b32d 100644 --- a/docs/data.rst +++ b/docs/data.rst @@ -571,6 +571,15 @@ Query submitted data of a specific form ---------------------------------------- Use the `query` or `data` parameter to pass in a JSON key/value query. +ISO 8601 date formats are supported. Below are examples of common formats: + +- ``YYYY-MM-DD`` (e.g., 2024-09-18) +- ``YYYY-MM-DDThh:mm:ss`` (e.g., 2024-09-18T14:30:00) +- ``YYYY-MM-DDThh:mm:ssZ`` (e.g., 2024-09-18T14:30:00Z) +- ``YYYY-MM-DDThh:mm:ss.ssssssZ`` (e.g., 2024-09-18T14:30:00.169615Z) +- ``YYYY-MM-DDThh:mm:ss±hh:mm`` (e.g., 2024-09-17T13:39:40+00:00) +- ``YYYY-MM-DDThh:mm:ss.ssssss±hh:mm`` (e.g., 2024-09-17T13:39:40.169615+00:00) + When quering a date time field whose value is in ISO format such as ``2020-12-18T09:36:19.767455+00:00``, it is important to ensure the ``+`` (plus) is encoded to ``%2b``. ``+`` without encoding is parsed as whitespace. So ``2020-12-18T09:36:19.767455+00:00`` should be converted to ``2020-12-18T09:36:19.767455%2b00:00``. @@ -672,6 +681,15 @@ Query submissions with `NULL` submission review status curl -X GET https://api.ona.io/api/v1/data/22845?query={"_review_status": null} +Example XIII +^^^^^^^^^^^^ + +Query submissions collected within specific dates or edited within specific dates. + +:: + + curl -X GET https://api.ona.io/api/v1/data/22845?query={"$or": [{"_submission_time":{"$gte": "2020-01-01", "$lte": "2020-08-31"}}, {"_last_edited":{"$gte": "2020-01-01", "$lte": "2020-08-31"}}]} + All Filters Options diff --git a/onadata/apps/api/tests/viewsets/test_data_viewset.py b/onadata/apps/api/tests/viewsets/test_data_viewset.py index cf8982ed25..cadce3b8da 100644 --- a/onadata/apps/api/tests/viewsets/test_data_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_data_viewset.py @@ -3513,6 +3513,79 @@ def test_data_query_ornull(self): self.assertEqual(response.status_code, 200) self.assertEqual(len(response.data), 4) + def test_or_with_date_filters(self): + """OR operation filter works for date fields""" + view = DataViewSet.as_view({"get": "list"}) + # Mock date_created (_submission_time) + with patch( + "django.utils.timezone.now", + Mock(return_value=datetime.datetime(2024, 9, 16, tzinfo=timezone.utc)), + ): + Instance.objects.create( + xform=self.xform, + xml='mango', + ) + # Mock date_created (_submission_time) + with patch( + "django.utils.timezone.now", + Mock(return_value=datetime.datetime(2024, 9, 18, tzinfo=timezone.utc)), + ): + Instance.objects.create( + xform=self.xform, + xml='mango', + ) + # Mock date_created (_submission_time) + with patch( + "django.utils.timezone.now", + Mock(return_value=datetime.datetime(2022, 4, 1, tzinfo=timezone.utc)), + ): + Instance.objects.create( + xform=self.xform, + last_edited=datetime.datetime(2023, 4, 1, tzinfo=timezone.utc), + xml='mango', + ) + # Mock date_created (_submission_time) + with patch( + "django.utils.timezone.now", + Mock(return_value=datetime.datetime(2022, 4, 1, tzinfo=timezone.utc)), + ): + Instance.objects.create( + xform=self.xform, + last_edited=datetime.datetime(2023, 5, 1, tzinfo=timezone.utc), + xml='mango', + ) + + query_str = ( + '{"$or": [{"_submission_time":{"$gte": "2024-09-16", "$lte": "2024-09-18"}}, ' + '{"_last_edited":{"$gte": "2023-04-01", "$lte": "2023-05-01"}}]}' + ) + request = self.factory.get("/?query=%s" % query_str, **self.extra) + response = view(request, pk=self.xform.pk) + self.assertEqual(response.status_code, 200) + self.assertEqual(len(response.data), 4) + query_str = ( + '{"$or": [{"_submission_time":{"$gte": "2024-09-16"}}, ' + '{"_last_edited":{"$gte": "2023-05-01"}}]}' + ) + request = self.factory.get("/?query=%s" % query_str, **self.extra) + response = view(request, pk=self.xform.pk) + self.assertEqual(response.status_code, 200) + self.assertEqual(len(response.data), 3) + + def test_invalid_date_filters(self): + """Invalid date filters are handled appropriately""" + view = DataViewSet.as_view({"get": "list"}) + + for json_date_field in ["_submission_time", "_date_modified", "_last_edited"]: + query_str = '{"%s": {"$lte": "watermelon"}}' % json_date_field + request = self.factory.get("/?query=%s" % query_str, **self.extra) + response = view(request, pk=self.xform.pk) + self.assertEqual(response.status_code, 400) + self.assertEqual( + f"{response.data['detail']}", + f'Invalid date value "watermelon" for the field {json_date_field}.', + ) + def test_data_list_xml_format(self): """Test DataViewSet list XML""" # create submission diff --git a/onadata/apps/viewer/parsed_instance_tools.py b/onadata/apps/viewer/parsed_instance_tools.py index 00135e7a8e..00b376e10a 100644 --- a/onadata/apps/viewer/parsed_instance_tools.py +++ b/onadata/apps/viewer/parsed_instance_tools.py @@ -6,12 +6,15 @@ import json from builtins import str as text from typing import Any, Tuple - import six -from onadata.libs.utils.common_tags import DATE_FORMAT, MONGO_STRFTIME +from django.utils.translation import gettext_lazy as _ + +from onadata.libs.utils.common_tags import KNOWN_DATE_FORMATS +from onadata.libs.exceptions import InavlidDateFormat -KNOWN_DATES = ["_submission_time"] + +KNOWN_DATES = ["_submission_time", "_last_edited", "_date_modified"] NONE_JSON_FIELDS = { "_submission_time": "date_created", "_date_modified": "date_modified", @@ -62,11 +65,23 @@ def _parse_where(query, known_integers, known_decimals, or_where, or_params): _v = value if field_key in KNOWN_DATES: raw_date = value - for date_format in (MONGO_STRFTIME, DATE_FORMAT): + is_date_valid = False + for date_format in KNOWN_DATE_FORMATS: try: - _v = datetime.datetime.strptime(raw_date[:19], date_format) + _v = datetime.datetime.strptime(raw_date, date_format) except ValueError: - pass + is_date_valid = False + else: + is_date_valid = True + break + + if not is_date_valid: + err_msg = _( + f'Invalid date value "{value}" ' + f"for the field {field_key}." + ) + raise InavlidDateFormat(err_msg) + if field_key in NONE_JSON_FIELDS: where_params.extend([text(_v)]) else: @@ -131,6 +146,20 @@ def get_where_clause(query, form_integer_fields=None, form_decimal_fields=None): for or_query in or_dict: for key, value in or_query.items(): + if key in NONE_JSON_FIELDS: + and_query_where, and_query_where_params = _parse_where( + or_query, + known_integers, + known_decimals, + [], + [], + ) + or_where.extend( + ["".join(["(", " AND ".join(and_query_where), ")"])] + ) + or_params.extend(and_query_where_params) + continue + if value is None: or_where.extend([f"json->>'{key}' IS NULL"]) elif isinstance(value, list): diff --git a/onadata/apps/viewer/tests/test_parsed_instance.py b/onadata/apps/viewer/tests/test_parsed_instance.py index a3942df52d..4d6e4321eb 100644 --- a/onadata/apps/viewer/tests/test_parsed_instance.py +++ b/onadata/apps/viewer/tests/test_parsed_instance.py @@ -1,6 +1,8 @@ import os - from datetime import datetime + +from rest_framework.exceptions import ParseError + from onadata.apps.logger.models.instance import Instance from onadata.apps.main.models.user_profile import UserProfile from onadata.apps.main.tests.test_base import TestBase @@ -90,6 +92,47 @@ def test_get_where_clause_with_integer(self): self.assertEqual(where, ["json::text ~* cast(%s as text)"]) self.assertEqual(where_params, [11]) + def test_get_where_clause_or_date_range(self): + """OR operation get_where_clause with date range""" + query = ( + '{"$or": [{"_submission_time":{"$gte": "2024-09-17T13:39:40.001694+00:00", ' + '"$lte": "2024-09-17T13:39:40.001694+00:00"}}, ' + '{"_last_edited":{"$gte": "2024-04-01T13:39:40.001694+00:00", ' + '"$lte": "2024-04-01T13:39:40.001694+00:00"}}, ' + '{"_date_modified":{"$gte": "2024-04-01T13:39:40.001694+00:00", ' + '"$lte": "2024-04-01T13:39:40.001694+00:00"}}]}' + ) + where, where_params = get_where_clause(query) + self.assertEqual( + where, + [ + ( + "((date_created >= %s AND date_created <= %s) OR " + "(last_edited >= %s AND last_edited <= %s) OR " + "(date_modified >= %s AND date_modified <= %s))" + ) + ], + ) + self.assertEqual( + where_params, + [ + "2024-09-17 13:39:40.001694+00:00", + "2024-09-17 13:39:40.001694+00:00", + "2024-04-01 13:39:40.001694+00:00", + "2024-04-01 13:39:40.001694+00:00", + "2024-04-01 13:39:40.001694+00:00", + "2024-04-01 13:39:40.001694+00:00", + ], + ) + + def test_invalid_date_format(self): + """Inavlid date format is handled""" + for json_date_field in ["_submission_time", "_date_modified", "_last_edited"]: + query = {json_date_field: {"$lte": "watermelon"}} + + with self.assertRaises(ParseError): + get_where_clause(query) + def test_retrieve_records_based_on_form_verion(self): self._create_user_and_login() self._publish_transportation_form() diff --git a/onadata/libs/exceptions.py b/onadata/libs/exceptions.py index d41c22f8de..a24987f045 100644 --- a/onadata/libs/exceptions.py +++ b/onadata/libs/exceptions.py @@ -2,7 +2,7 @@ """Custom Expecting classes.""" from django.utils.translation import gettext_lazy as _ -from rest_framework.exceptions import APIException +from rest_framework.exceptions import APIException, ParseError class EnketoError(Exception): @@ -35,3 +35,9 @@ class ServiceUnavailable(APIException): status_code = 503 default_detail = "Service temporarily unavailable, try again later." + + +class InavlidDateFormat(ParseError): + """Raise when request query has invalid date.""" + + default_detail = _("Invalid date format.") diff --git a/onadata/libs/utils/common_tags.py b/onadata/libs/utils/common_tags.py index 20b091b6d9..4a5680f2cd 100644 --- a/onadata/libs/utils/common_tags.py +++ b/onadata/libs/utils/common_tags.py @@ -71,6 +71,12 @@ # datetime format that we store in mongo MONGO_STRFTIME = "%Y-%m-%dT%H:%M:%S" DATE_FORMAT = "%Y-%m-%d" +KNOWN_DATE_FORMATS = [ + DATE_FORMAT, + MONGO_STRFTIME, + "%Y-%m-%dT%H:%M:%S%z", + "%Y-%m-%dT%H:%M:%S.%f%z", +] # how to represent N/A in exports NA_REP = "n/a" diff --git a/requirements/base.in b/requirements/base.in index 0755924497..fec5d9b943 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -7,5 +7,5 @@ git+https://github.com/onaio/django-digest.git@6bf61ec08502fd3545d4f2c0838b6cb15 git+https://github.com/onaio/django-multidb-router.git@f711368180d58eef87eda54fadfd5f8355623d52#egg=django-multidb-router git+https://github.com/onaio/floip-py.git@3c980eb184069ae7c3c9136b18441978237cd41d#egg=pyfloip git+https://github.com/onaio/python-json2xlsclient.git@62b4645f7b4f2684421a13ce98da0331a9dd66a0#egg=python-json2xlsclient -git+https://github.com/onaio/ona-oidc.git@pytz-deprecated#egg=ona-oidc +git+https://github.com/onaio/ona-oidc.git@v1.0.4#egg=ona-oidc -e git+https://github.com/onaio/savreaderwriter.git@fix-pep-440-issues#egg=savreaderwriter diff --git a/requirements/base.pip b/requirements/base.pip index e170786b16..43d400ee9d 100644 --- a/requirements/base.pip +++ b/requirements/base.pip @@ -235,7 +235,7 @@ oauthlib==3.2.2 # via # django-oauth-toolkit # requests-oauthlib -ona-oidc @ git+https://github.com/onaio/ona-oidc.git@pytz-deprecated +ona-oidc @ git+https://github.com/onaio/ona-oidc.git@v1.0.4 # via -r requirements/base.in openpyxl==3.1.2 # via diff --git a/requirements/dev.pip b/requirements/dev.pip index 8c1232c941..bcdc710dd0 100644 --- a/requirements/dev.pip +++ b/requirements/dev.pip @@ -303,7 +303,7 @@ oauthlib==3.2.2 # via # django-oauth-toolkit # requests-oauthlib -ona-oidc @ git+https://github.com/onaio/ona-oidc.git@pytz-deprecated +ona-oidc @ git+https://github.com/onaio/ona-oidc.git@v1.0.4 # via -r requirements/base.in openpyxl==3.1.2 # via