From b35ded8eb7f0e8c193aa89c03cd0e82c0087e4e4 Mon Sep 17 00:00:00 2001 From: Ukang'a Dickson Date: Thu, 9 Jun 2022 01:31:03 +0300 Subject: [PATCH 1/3] Fix data type for raw select queries data Closes #2254 --- .prospector.yaml | 10 +++++++++- .../apps/api/tests/viewsets/test_data_viewset.py | 12 ++++++++++++ onadata/apps/viewer/models/parsed_instance.py | 14 +++++++------- requirements/base.pip | 2 ++ requirements/dev.pip | 2 ++ setup.cfg | 1 + 6 files changed, 33 insertions(+), 8 deletions(-) diff --git a/.prospector.yaml b/.prospector.yaml index 452c66da60..27ee1e3a30 100644 --- a/.prospector.yaml +++ b/.prospector.yaml @@ -1,6 +1,14 @@ -strictness: medium +strictness: veryhigh doc-warnings: false test-warnings: false autodetect: true member-warnings: false max-line-length: 88 + +pylint: + options: + extension-pkg-allow-list: + - ujson + +mccabe: + run: false diff --git a/onadata/apps/api/tests/viewsets/test_data_viewset.py b/onadata/apps/api/tests/viewsets/test_data_viewset.py index adc64e9b6a..b6afb10e2c 100644 --- a/onadata/apps/api/tests/viewsets/test_data_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_data_viewset.py @@ -233,6 +233,7 @@ def test_data_list_with_xform_in_delete_async_queue(self): self.assertEqual(len(response.data), initial_count - 1) def test_numeric_types_are_rendered_as_required(self): + """Test numeric types are rendered as numeric.""" tutorial_folder = os.path.join( os.path.dirname(__file__), "..", "fixtures", "forms", "tutorial" ) @@ -244,6 +245,7 @@ def test_numeric_types_are_rendered_as_required(self): create_instance(self.user.username, open(instance_path, "rb"), []) self.assertEqual(self.xform.instances.count(), 1) + view = DataViewSet.as_view({"get": "list"}) request = self.factory.get("/", **self.extra) response = view(request, pk=self.xform.id) @@ -253,6 +255,16 @@ def test_numeric_types_are_rendered_as_required(self): self.assertEqual(response.data[0].get("net_worth"), 100000.00) self.assertEqual(response.data[0].get("imei"), "351746052009472") + # test when fields parameter is used. + fields_query = {"fields": '["_id", "age", "net_worth", "imei"]'} + request = self.factory.get("/", data=fields_query, **self.extra) + response = view(request, pk=self.xform.id) + self.assertEqual(response.status_code, 200, response.data) + # check that ONLY values with numeric and decimal types are converted + self.assertEqual(response.data[0].get("age"), 35) + self.assertEqual(response.data[0].get("net_worth"), 100000.00) + self.assertEqual(response.data[0].get("imei"), "351746052009472") + def test_data_jsonp(self): self._make_submissions() view = DataViewSet.as_view({"get": "list"}) diff --git a/onadata/apps/viewer/models/parsed_instance.py b/onadata/apps/viewer/models/parsed_instance.py index 7984695898..5c00f78403 100644 --- a/onadata/apps/viewer/models/parsed_instance.py +++ b/onadata/apps/viewer/models/parsed_instance.py @@ -3,7 +3,6 @@ ParsedInstance model """ import datetime -import json import types from django.conf import settings @@ -12,6 +11,7 @@ from django.utils.translation import gettext as _ import six +import ujson as json from dateutil import parser from onadata.apps.logger.models.instance import Instance, _get_attachments_from_instance @@ -136,7 +136,7 @@ def parse_json(data): yield parse_json(row[0]) if row[0] else None else: for row in cursor.fetchall(): - yield dict(zip(fields, row)) + yield dict(zip(fields, (json.loads(s) for s in row))) def get_etag_hash_from_query(queryset, sql=None, params=None): @@ -144,9 +144,10 @@ def get_etag_hash_from_query(queryset, sql=None, params=None): if not isinstance(queryset, EmptyQuerySet): if sql is None: sql, params = queryset.query.sql_with_params() + from_index = sql.find("FROM ") sql = ( "SELECT md5(string_agg(date_modified::text, ''))" - " FROM (SELECT date_modified " + sql[sql.find("FROM ") :] + ") AS A" + " FROM (SELECT date_modified " + sql[from_index:] + ") AS A" ) etag_hash = [i for i in _query_iterator(sql, params=params) if i is not None] @@ -181,7 +182,8 @@ def _start_index_limit(records, sql, fields, params, sort, start_index, limit): and not fields and not has_json_fields ): - records = records[start_index : start_index + limit] + end_index = start_index + limit + records = records[start_index:end_index] if start_index is not None and limit is None and not fields and not has_json_fields: records = records[start_index:] @@ -349,7 +351,6 @@ class ParsedInstance(models.Model): ) start_time = models.DateTimeField(null=True) end_time = models.DateTimeField(null=True) - # TODO: decide if decimal field is better than float field. lat = models.FloatField(null=True) lng = models.FloatField(null=True) @@ -444,8 +445,6 @@ def _get_name_for_type(self, type_value): return item["name"] return None - # TODO: figure out how much of this code should be here versus - # data_dictionary.py. def _set_geopoint(self): if self.instance.point: self.lat = self.instance.point.y @@ -474,6 +473,7 @@ def remove_note(self, note_id): note.delete() def get_notes(self): + """Returns a list of notes data objects.""" notes = [] note_qs = self.instance.notes.values( "id", "note", "date_created", "date_modified" diff --git a/requirements/base.pip b/requirements/base.pip index 7e8fbd0c32..ce82639247 100644 --- a/requirements/base.pip +++ b/requirements/base.pip @@ -377,6 +377,8 @@ tabulator==1.53.5 # via # datapackage # tableschema +ujson==5.3.0 + # via onadata unicodecsv==0.14.1 # via # datapackage diff --git a/requirements/dev.pip b/requirements/dev.pip index f1d4a95328..9adbc3ff6d 100644 --- a/requirements/dev.pip +++ b/requirements/dev.pip @@ -513,6 +513,8 @@ traitlets==5.2.2.post1 # via # ipython # matplotlib-inline +ujson==5.3.0 + # via onadata unicodecsv==0.14.1 # via # datapackage diff --git a/setup.cfg b/setup.cfg index c1c33c1fd0..0c53480db1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -108,6 +108,7 @@ install_requires = # Google exports google-auth-oauthlib google-auth + ujson python_requires = >= 3.9 setup_requires = setuptools_scm From 6ab3d87e280855b0f8e22f51ee090d94608a5a56 Mon Sep 17 00:00:00 2001 From: Ukang'a Dickson Date: Thu, 9 Jun 2022 20:21:11 +0300 Subject: [PATCH 2/3] Use json.loads only when handling a str object. --- .pylintrc | 2 +- onadata/apps/main/tests/test_form_api.py | 189 ++++++++++-------- onadata/apps/viewer/models/parsed_instance.py | 4 +- 3 files changed, 111 insertions(+), 84 deletions(-) diff --git a/.pylintrc b/.pylintrc index ab8c6a1b0a..686428e7b6 100644 --- a/.pylintrc +++ b/.pylintrc @@ -3,7 +3,7 @@ # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code -extension-pkg-whitelist= +extension-pkg-allow-list=ujson # Add files or directories to the blacklist. They should be base names, not # paths. diff --git a/onadata/apps/main/tests/test_form_api.py b/onadata/apps/main/tests/test_form_api.py index 3308fc6e87..a10abf1be9 100644 --- a/onadata/apps/main/tests/test_form_api.py +++ b/onadata/apps/main/tests/test_form_api.py @@ -1,3 +1,7 @@ +# -*- coding: utf-8 -*- +""" +Test onadata.apps.main.views.api. +""" import base64 import json @@ -6,30 +10,24 @@ from onadata.apps.main.tests.test_base import TestBase from onadata.apps.main.views import api -from onadata.apps.viewer.models.parsed_instance import ParsedInstance from onadata.libs.utils.mongo import _decode_from_mongo, _encode_for_mongo -def dict_for_mongo_without_userform_id(parsed_instance): - d = parsed_instance.to_dict_for_mongo() - # remove _userform_id since its not returned by the API - d.pop(ParsedInstance.USERFORM_ID) - return d - - class TestFormAPI(TestBase): + """Test the form API endpoint.""" def setUp(self): - super(TestBase, self).setUp() + super().setUp() self.factory = RequestFactory() self._create_user_and_login() self._publish_transportation_form_and_submit_instance() - self.api_url = reverse(api, kwargs={ - 'username': self.user.username, - 'id_string': self.xform.id_string - }) + self.api_url = reverse( + api, + kwargs={"username": self.user.username, "id_string": self.xform.id_string}, + ) def test_api(self): + """Test the forms /api endpoint.""" request = self.factory.get(self.api_url, {}) request.user = self.user response = api(request, self.user.username, self.xform.id_string) @@ -43,119 +41,145 @@ def test_api(self): self.assertEqual(find_d, data) def test_api_with_query(self): + """Test the query param for forms API endpoint.""" # query string - query = '{"transport/available_transportation_types_to_referral_facil'\ - 'ity":"none"}' - data = {'query': query} + query = ( + '{"transport/available_transportation_types_to_referral_facil' + 'ity":"none"}' + ) + data = {"query": query} response = self.client.get(self.api_url, data) self.assertEqual(response.status_code, 200) - d = self.xform.instances.all()[0].json + data = self.xform.instances.all()[0].json find_d = json.loads(response.content)[0] - self.assertEqual(find_d, d) + self.assertEqual(find_d, data) def test_api_query_no_records(self): + """Test query param that returns no records.""" # query string query = { - "transport/available_transporation_types_to_referral_facility": - "bicycle" + "transport/available_transporation_types_to_referral_facility": "bicycle" } - data = {'query': json.dumps(query)} + data = {"query": json.dumps(query)} response = self.client.get(self.api_url, data) - self.assertEqual(response.status_code, 200) - self.assertEqual(response.content, b'[]') - data['fields'] = '["_id"]' + self.assertEqual(response.status_code, 200, response.content) + self.assertEqual(response.content, b"[]") + data["fields"] = '["_id"]' response = self.client.get(self.api_url, data) - self.assertEqual(response.status_code, 200) - self.assertEqual(response.content, b'[]') + self.assertEqual(response.status_code, 200, response.content) + self.assertEqual(response.content, b"[]") def test_handle_bad_json(self): - response = self.client.get(self.api_url, {'query': '{bad'}) - self.assertEqual(response.status_code, 400) + """Test a bad query is handled correctly with status 400.""" + response = self.client.get(self.api_url, {"query": "{bad"}) + self.assertContains( + response, + "Expecting property name enclosed in double quotes", + status_code=400, + ) def test_api_jsonp(self): + """Test support for jsonp response at the API endpoint.""" # query string - callback = 'jsonpCallback' - response = self.client.get(self.api_url, {'callback': callback}) + callback = "jsonpCallback" + response = self.client.get(self.api_url, {"callback": callback}) self.assertEqual(response.status_code, 200) - content = response.content.decode('utf-8') - self.assertEqual(content.startswith(callback + '('), True) - self.assertEqual(content.endswith(')'), True) - start = callback.__len__() + 1 - end = content.__len__() - 1 - content = content[start: end] - d = self.xform.instances.all()[0].json + content = response.content.decode("utf-8") + self.assertEqual(content.startswith(callback + "("), True) + self.assertEqual(content.endswith(")"), True) + start = len(callback) + 1 + end = len(content) - 1 + content = content[start:end] + data = self.xform.instances.all()[0].json find_d = json.loads(content)[0] - self.assertEqual(find_d, d) + self.assertEqual(find_d, data) def test_api_with_query_start_limit(self): + """Test start and limit query parameters.""" for i in range(1, 3): self._submit_transport_instance(i) # query string - data = {'start': 0, 'limit': 2} + data = {"start": 0, "limit": 2} response = self.client.get(self.api_url, data) self.assertEqual(response.status_code, 200) content = json.loads(response.content) self.assertEqual(len(content), 2) - data['fields'] = '["_id"]' + data["fields"] = '["_id"]' response = self.client.get(self.api_url, data) self.assertEqual(response.status_code, 200) content = json.loads(response.content) self.assertEqual(len(content), 2) + # pylint: disable=invalid-name def test_api_with_query_invalid_start_limit(self): + """Test start and limit query parameters with invalid values.""" # query string - query = '{"transport/available_transportation_types_to_referral_facil'\ - 'ity":"none"}' - data = {'query': query, 'start': -100, 'limit': -100} + query = ( + '{"transport/available_transportation_types_to_referral_facil' + 'ity":"none"}' + ) + data = {"query": query, "start": -100, "limit": -100} response = self.client.get(self.api_url, data) - self.assertEqual(response.status_code, 400) + self.assertContains(response, "Invalid start/limit params", status_code=400) - data = {'query': query, 'start': 'invalid', 'limit': 'invalid'} + data = {"query": query, "start": "invalid", "limit": "invalid"} response = self.client.get(self.api_url, data) - self.assertEqual(response.status_code, 400) + self.assertContains( + response, "invalid literal for int() with base 10:", status_code=400 + ) def test_api_count(self): + """Test count parameter at the form API endpoint.""" # query string - query = '{"transport/available_transportation_types_to_referral_facil'\ - 'ity":"none"}' - data = {'query': query, 'count': 1} + query = ( + '{"transport/available_transportation_types_to_referral_facil' + 'ity":"none"}' + ) + data = {"query": query, "count": 1} response = self.client.get(self.api_url, data) - self.assertEqual(response.status_code, 200) + self.assertEqual(response.status_code, 200, response.content) find_d = json.loads(response.content)[0] - self.assertTrue('count' in find_d) + self.assertTrue("count" in find_d) - data['fields'] = '["_id"]' + data["fields"] = '["_id"]' response = self.client.get(self.api_url, data) - self.assertEqual(response.status_code, 200) + self.assertEqual(response.status_code, 200, response.content) find_d = json.loads(response.content)[0] - self.assertTrue('count' in find_d) - self.assertEqual(find_d.get('count'), 1) + self.assertTrue("count" in find_d) + self.assertEqual(find_d.get("count"), 1) def test_api_column_select(self): + """Test fields parameter at the API endpoint.""" # query string - query = '{"transport/available_transportation_types_to_referral_facil'\ - 'ity":"none"}' - columns = '["transport/available_transportation_types_to_referral_fac'\ - 'ility"]' - data = {'query': query, 'fields': columns} + query = ( + '{"transport/available_transportation_types_to_referral_facility":"none"}' + ) + columns = '["transport/available_transportation_types_to_referral_facility"]' + data = {"query": query, "fields": columns} request = self.factory.get(self.api_url, data) request.user = self.user response = api(request, self.user.username, self.xform.id_string) - self.assertEqual(response.status_code, 200) + self.assertEqual(response.status_code, 200, response.content) find_d = json.loads(response.content)[0] self.assertTrue( - 'transport/available_transportation_types_to_referral_facility' in - find_d) - self.assertFalse('_attachments' in find_d) + "transport/available_transportation_types_to_referral_facility" in find_d + ) + self.assertFalse("_attachments" in find_d) def test_api_decode_from_mongo(self): + """Test API decodes Mongo DB symbols.""" field = "$section1.group01.question1" encoded = _encode_for_mongo(field) - self.assertEqual(encoded, ( - "%(dollar)ssection1%(dot)sgroup01%(dot)squestion1" % { - "dollar": base64.b64encode( - '$'.encode('utf-8')).decode('utf-8'), - "dot": base64.b64encode('.'.encode('utf-8')).decode('utf-8')})) + self.assertEqual( + encoded, + ( + "%(dollar)ssection1%(dot)sgroup01%(dot)squestion1" + % { + "dollar": base64.b64encode("$".encode("utf-8")).decode("utf-8"), + "dot": base64.b64encode(".".encode("utf-8")).decode("utf-8"), + } + ), + ) decoded = _decode_from_mongo(encoded) self.assertEqual(field, decoded) @@ -171,20 +195,20 @@ def test_api_with_or_query(self): # record 2: 'transport/loop_over_transport_types_frequency/ambulance/fr # equency_to_referral_facility': 'weekly' params = { - 'query': - '{"$or": [{"transport/loop_over_transport_types_frequency/ambulanc' + "query": '{"$or": [{"transport/loop_over_transport_types_frequency/ambulanc' 'e/frequency_to_referral_facility": "weekly"}, {"transport/loop_ov' - 'er_transport_types_frequency/ambulance/frequency_to_referral_faci' - 'lity": "daily"}]}'} + "er_transport_types_frequency/ambulance/frequency_to_referral_faci" + 'lity": "daily"}]}' + } response = self.client.get(self.api_url, params) - self.assertEqual(response.status_code, 200) + self.assertEqual(response.status_code, 200, response.content) data = json.loads(response.content) self.assertEqual(len(data), 2) # check with fields filter - params['fields'] = '["_id"]' + params["fields"] = '["_id"]' response = self.client.get(self.api_url, params) - self.assertEqual(response.status_code, 200) + self.assertEqual(response.status_code, 200, response.content) data = json.loads(response.content) self.assertEqual(len(data), 2) @@ -196,11 +220,12 @@ def test_api_with_or_query(self): self.assertEqual(len(data), 3) def test_api_cors_options(self): + """Test CORS options at the API endpoint.""" response = self.anon.options(self.api_url) - allowed_headers = ['Accept', 'Origin', 'X-Requested-With', - 'Authorization'] - provided_headers = [h.strip() for h in response[ - 'Access-Control-Allow-Headers'].split(',')] + allowed_headers = ["Accept", "Origin", "X-Requested-With", "Authorization"] + provided_headers = [ + h.strip() for h in response["Access-Control-Allow-Headers"].split(",") + ] self.assertListEqual(allowed_headers, provided_headers) - self.assertEqual(response['Access-Control-Allow-Methods'], 'GET') - self.assertEqual(response['Access-Control-Allow-Origin'], '*') + self.assertEqual(response["Access-Control-Allow-Methods"], "GET") + self.assertEqual(response["Access-Control-Allow-Origin"], "*") diff --git a/onadata/apps/viewer/models/parsed_instance.py b/onadata/apps/viewer/models/parsed_instance.py index 5c00f78403..e91e058d62 100644 --- a/onadata/apps/viewer/models/parsed_instance.py +++ b/onadata/apps/viewer/models/parsed_instance.py @@ -136,7 +136,9 @@ def parse_json(data): yield parse_json(row[0]) if row[0] else None else: for row in cursor.fetchall(): - yield dict(zip(fields, (json.loads(s) for s in row))) + yield dict( + zip(fields, (json.loads(s) if isinstance(s, str) else s for s in row)) + ) def get_etag_hash_from_query(queryset, sql=None, params=None): From ddebd2a2d540909b9fd4c46832c14472e841c1c6 Mon Sep 17 00:00:00 2001 From: Ukang'a Dickson Date: Thu, 9 Jun 2022 20:27:19 +0300 Subject: [PATCH 3/3] Should expect an integer --- onadata/apps/logger/tests/models/test_instance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onadata/apps/logger/tests/models/test_instance.py b/onadata/apps/logger/tests/models/test_instance.py index 06741cc3a5..3716a1894d 100644 --- a/onadata/apps/logger/tests/models/test_instance.py +++ b/onadata/apps/logger/tests/models/test_instance.py @@ -195,7 +195,7 @@ def test_query_filter_by_integer(self): ) ] self.assertEqual(len(data), 1) - self.assertEqual(data, [str(oldest)]) + self.assertEqual(data, [oldest]) # mongo $gt data = [