Skip to content

Commit

Permalink
Fix pagination on endpoint /api/v2/open-data/<id>/data returning du…
Browse files Browse the repository at this point in the history
…plicates (#2467)

* fix api/v2/open-data/<id>/data results growing with increase page number

With each increase in page number, the number of results returned increase by a 100

* order /api/v2/open-data/<id>/data results when pagination is applied

For pagination to work without dupliacates, the results have to be ordered. Otherwise, the database will not guarantee a record  encountered in a previous page will not be returned in a future page as the database does not order results by default and will return randomly

* enhance test case

* refactor code

* fix flaky test
  • Loading branch information
kelvin-muchiri authored Aug 14, 2023
1 parent 0068432 commit 075c193
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 56 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ build
# media folder used by tests
# TODO figure out a way to clean this up rather than ignore it.
/onadata/test_media
/onadata/test_data_media
/onadata/media
/onadata/static

Expand Down
48 changes: 38 additions & 10 deletions onadata/apps/api/tests/viewsets/test_tableau_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,27 +328,55 @@ def test_replace_media_links(self):
def test_pagination(self):
"""Pagination works correctly"""
self.view = TableauViewSet.as_view({"get": "data"})
# test 1 submission
_open_data = get_or_create_opendata(self.xform)
uuid = _open_data[0].uuid
# Multiple submissions are ordered by primary key
# For pagination to work without duplicates, the results have to
# be ordered. Otherwise, the database will not guarantee a record
# encountered in a previous page will not be returned in a future page
# as the database does not order results by default and will return
# randomly
path = os.path.join(self.fixture_dir, "repeats_sub.xml")
# Create additional submissions to increase our chances of the results
# being random
for _ in range(200):
self._make_submission(path, forced_submission_time=self._submission_time)

# Page 1
request = self.factory.get(
"/", data={"page": 1, "page_size": 100}, **self.extra
)
response = self.view(request, uuid=uuid)
self.assertEqual(response.status_code, 200)
row_data = streaming_data(response)
self.assertEqual(len(row_data), 1)
self.assertEqual(len(row_data), 100)
instances = self.xform.instances.all().order_by("pk")
self.assertEqual(len(instances), 201)

# multiple submissions are ordered by primary key
path = os.path.join(self.fixture_dir, "repeats_sub.xml")
self._make_submission(path, forced_submission_time=self._submission_time)
for index, instance in enumerate(instances[:100]):
self.assertEqual(row_data[index]["_id"], instance.pk)

# Page 2
request = self.factory.get(
"/", data={"page": 2, "page_size": 100}, **self.extra
)
response = self.view(request, uuid=uuid)
self.assertEqual(response.status_code, 200)
row_data = streaming_data(response)
self.assertEqual(len(row_data), 2)
instances = self.xform.instances.all().order_by("pk")
self.assertEqual(row_data[0]["_id"], instances[0].pk)
self.assertEqual(row_data[1]["_id"], instances[1].pk)
self.assertEqual(len(row_data), 100)

for index, instance in enumerate(instances[100:101]):
self.assertEqual(row_data[index]["_id"], instance.pk)

# Page 3
request = self.factory.get(
"/", data={"page": 3, "page_size": 100}, **self.extra
)
response = self.view(request, uuid=uuid)
self.assertEqual(response.status_code, 200)
row_data = streaming_data(response)
self.assertEqual(len(row_data), 1)
self.assertEqual(row_data[0]["_id"], instances.last().pk)

def test_count_query_param(self):
"""count query param works"""
Expand All @@ -368,7 +396,7 @@ def test_gt_id_query_param(self):
self.view = TableauViewSet.as_view({"get": "data"})
_open_data = get_or_create_opendata(self.xform)
uuid = _open_data[0].uuid
request = self.factory.get("/", data={"gt_id": 500}, **self.extra)
request = self.factory.get("/", data={"gt_id": 10000}, **self.extra)
response = self.view(request, uuid=uuid)
self.assertEqual(response.status_code, 200)
row_data = streaming_data(response)
Expand Down
24 changes: 6 additions & 18 deletions onadata/apps/api/viewsets/v2/tableau_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,17 +169,6 @@ class TableauViewSet(OpenDataViewSet):
TableauViewSet - the /api/v2/tableau API endpoin implementation.
"""

pagination_class = RawSQLQueryPageNumberPagination
data_count = None

def paginate_queryset(self, queryset):
"""Returns a paginated queryset."""
if self.paginator is None:
return None
return self.paginator.paginate_queryset(
queryset, self.request, view=self, count=self.data_count
)

@action(methods=["GET"], detail=True)
def data(self, request, **kwargs):
# pylint: disable=attribute-defined-outside-init
Expand All @@ -195,6 +184,7 @@ def data(self, request, **kwargs):
query_param_keys = request.query_params
should_paginate = any(k in query_param_keys for k in pagination_keys)
data = []
data_count = 0

if isinstance(self.object.content_object, XForm):
if not self.object.active:
Expand All @@ -217,14 +207,14 @@ def data(self, request, **kwargs):
if gt_id:
qs_kwargs.update({"id__gt": gt_id})

self.data_count = (
data_count = (
Instance.objects.filter(**qs_kwargs, deleted_at__isnull=True)
.only("pk")
.count()
)

if count:
return Response({"count": self.data_count})
return Response({"count": data_count})

sql_where = ""
sql_where_params = []
Expand All @@ -246,12 +236,10 @@ def data(self, request, **kwargs):
sql_params = [tuple(xform_pks)] + sql_where_params

if should_paginate:
offset, limit = self.paginator.get_offset_limit(
self.request, self.data_count
)
sql += " LIMIT %s OFFSET %s"
raw_paginator = RawSQLQueryPageNumberPagination()
offset, limit = raw_paginator.get_offset_limit(self.request, data_count)
sql += " ORDER BY id LIMIT %s OFFSET %s"
instances = Instance.objects.raw(sql, sql_params + [limit, offset])
instances = self.paginate_queryset(instances)

else:
instances = Instance.objects.raw(sql, sql_params)
Expand Down
7 changes: 2 additions & 5 deletions onadata/libs/pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ class RawSQLQueryPageNumberPagination(CountOverridablePageNumberPagination):

django_paginator_class = RawSQLQueryPaginator

def get_offset_limit(self, request, count) -> Tuple[int, int]:
def get_offset_limit(self, request, count: int) -> Tuple[int, int]:
"""Returns the offset and limit to be used in a raw SQL query"""
page_size = self.get_page_size(request)
# pass an empty object_list since we are not handling any pagination
Expand All @@ -169,9 +169,6 @@ def get_offset_limit(self, request, count) -> Tuple[int, int]:
self.get_page_number(request, paginator)
)
offset = (page_number - 1) * paginator.per_page
limit = offset + paginator.per_page

if limit + paginator.orphans >= paginator.count:
limit = paginator.count
limit = paginator.per_page

return (offset, limit)
74 changes: 51 additions & 23 deletions onadata/libs/tests/test_pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@

from onadata.apps.main.tests.test_base import TestBase
from onadata.apps.logger.models import Instance
from onadata.libs.pagination import StandardPageNumberPagination
from onadata.libs.pagination import (
StandardPageNumberPagination,
RawSQLQueryPageNumberPagination,
)


class TestPaginationModule(TestBase):
Expand All @@ -17,42 +20,67 @@ class TestPaginationModule(TestBase):

def test_generate_link_header_function(self):
req = HttpRequest()
req.META['SERVER_NAME'] = 'testserver'
req.META['SERVER_PORT'] = '80'
req.META['QUERY_STRING'] = "page=1&page_size=1"
req.META["SERVER_NAME"] = "testserver"
req.META["SERVER_PORT"] = "80"
req.META["QUERY_STRING"] = "page=1&page_size=1"
req.GET = {"page": 1, "page_size": 1}
self._publish_transportation_form()
self._make_submissions()
qs = Instance.objects.filter(xform=self.xform)
out = StandardPageNumberPagination().generate_link_header(
Request(req), qs
)
out = StandardPageNumberPagination().generate_link_header(Request(req), qs)
expected_out = {
'Link': '<http://testserver?page=2&page_size=1>; rel="next",'
' <http://testserver?page=4&page_size=1>; rel="last"'
"Link": '<http://testserver?page=2&page_size=1>; rel="next",'
' <http://testserver?page=4&page_size=1>; rel="last"'
}
self.assertEqual(out, expected_out)

# First page link is created when not on the first page
req.META['QUERY_STRING'] = "page=2&page_size=1"
req.META["QUERY_STRING"] = "page=2&page_size=1"
req.GET = {"page": 2, "page_size": 1}
out = StandardPageNumberPagination().generate_link_header(
Request(req), qs
)
out = StandardPageNumberPagination().generate_link_header(Request(req), qs)
expected_out = {
'Link': '<http://testserver?page_size=1>; rel="prev", '
'<http://testserver?page=3&page_size=1>; rel="next", '
'<http://testserver?page=4&page_size=1>; rel="last", '
'<http://testserver?page=1&page_size=1>; rel="first"'}
"Link": '<http://testserver?page_size=1>; rel="prev", '
'<http://testserver?page=3&page_size=1>; rel="next", '
'<http://testserver?page=4&page_size=1>; rel="last", '
'<http://testserver?page=1&page_size=1>; rel="first"'
}
self.assertEqual(out, expected_out)

# Last page link is not created on last page
req.META['QUERY_STRING'] = "page=4&page_size=1"
req.META["QUERY_STRING"] = "page=4&page_size=1"
req.GET = {"page": 4, "page_size": 1}
out = StandardPageNumberPagination().generate_link_header(
Request(req), qs
)
out = StandardPageNumberPagination().generate_link_header(Request(req), qs)
expected_out = {
'Link': '<http://testserver?page=3&page_size=1>; rel="prev", '
'<http://testserver?page=1&page_size=1>; rel="first"'}
"Link": '<http://testserver?page=3&page_size=1>; rel="prev", '
'<http://testserver?page=1&page_size=1>; rel="first"'
}
self.assertEqual(out, expected_out)


class RawSQLQueryPageNumberPaginationTestCase(TestBase):
"""Tests for the RawSQLQueryPageNumberPagination class"""

def setUp(self):
super().setUp()

self.request = HttpRequest()
self.request.method = "GET"
self.paginator = RawSQLQueryPageNumberPagination()

def test_offset_limit(self):
"""Returns the correct values for offset and limit"""
# page 1
self.request.GET = {"page": 1, "page_size": 100}
offset, limit = self.paginator.get_offset_limit(Request(self.request), 500)
self.assertEqual(offset, 0)
self.assertEqual(limit, 100)
# page 2
self.request.GET = {"page": 2, "page_size": 100}
offset, limit = self.paginator.get_offset_limit(Request(self.request), 500)
self.assertEqual(offset, 100)
self.assertEqual(limit, 100)
# page 3
self.request.GET = {"page": 3, "page_size": 100}
offset, limit = self.paginator.get_offset_limit(Request(self.request), 500)
self.assertEqual(offset, 200)
self.assertEqual(limit, 100)

0 comments on commit 075c193

Please sign in to comment.