Skip to content

Commit

Permalink
Merge pull request #2137 from onaio/empty_exports
Browse files Browse the repository at this point in the history
Generate xform headers in csv exports for XForms without submissions
  • Loading branch information
ukanga committed Sep 23, 2021
2 parents a6a95a3 + ad7e09f commit c5030cb
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 51 deletions.
12 changes: 11 additions & 1 deletion onadata/apps/viewer/tests/test_export_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,23 @@ def test_csv_export_url(self):

def test_csv_export_url_without_records(self):
# this has been refactored so that if NoRecordsFound Exception is
# thrown, it will return an empty csv
# thrown, it will return an empty csv containing only the xform schema
url = reverse('csv_export', kwargs={
'username': self.user.username,
'id_string': self.xform.id_string,
})
response = self.client.get(url)
self.assertEqual(response.status_code, 200)
# Unpack response streaming data
export_data = [i.decode(
'utf-8').replace('\n', '').split(
',') for i in response.streaming_content]
xform_headers = self.xform.get_headers()
# Remove review headers from xform headers
for x in ['_review_status', '_review_comment']:
xform_headers.remove(x)
# Test export data returned is xform headers list
self.assertEqual(xform_headers, export_data[0])

def test_xls_export_url(self):
self._submit_transport_instance()
Expand Down
14 changes: 14 additions & 0 deletions onadata/apps/viewer/tests/test_exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ def test_csv_without_na_values(self):
settings.NA_REP = na_rep_restore

def test_responses_for_empty_exports(self):
"""
csv exports for forms without submissions
should return xform column headers in export.
"""
self._publish_transportation_form()
# test csv though xls uses the same view
url = reverse(
Expand All @@ -109,6 +113,16 @@ def test_responses_for_empty_exports(self):
self.response = self.client.get(url)
self.assertEqual(self.response.status_code, 200)
self.assertIn('application/csv', self.response['content-type'])
# Unpack response streaming data
export_data = [i.decode(
'utf-8').replace('\n', '').split(
',') for i in self.response.streaming_content]
xform_headers = self.xform.get_headers()
# Remove review headers from xform headers
for x in ['_review_status', '_review_comment']:
xform_headers.remove(x)
# Test export data returned is xform headers list
self.assertEqual(xform_headers, export_data[0])

def test_create_export(self):
self._publish_transportation_form_and_submit_instance()
Expand Down
1 change: 1 addition & 0 deletions onadata/apps/viewer/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def setUp(self):
def test_create_async(self):

self._publish_transportation_form_and_submit_instance()
self.xform.refresh_from_db()
options = {"group_delimiter": "/",
"remove_group_name": False,
"split_select_multiples": True}
Expand Down
122 changes: 122 additions & 0 deletions onadata/libs/tests/utils/test_csv_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Test CSVDataFrameBuilder
"""
import csv
from onadata.libs.exceptions import NoRecordsFoundError
import os
from tempfile import NamedTemporaryFile

Expand Down Expand Up @@ -1209,6 +1210,127 @@ def test_show_choice_labels_select_multiple_2(self, mock_query_data):
}]
self.assertEqual(expected_result, result)

def test_export_data_for_xforms_without_submissions(self):
"""
Test xform schema for form with no submission
is successfully exported
"""
fixture = "new_repeats"
# publish form so we have a dd
self._publish_xls_fixture_set_xform(fixture)

# Confirm form has not submissions so far
self.assertEquals(self.xform.instances.count(), 0)
# Generate csv export for form
csv_df_builder = CSVDataFrameBuilder(
self.user.username, self.xform.id_string, include_images=False)
temp_file = NamedTemporaryFile(suffix=".csv", delete=False)
csv_df_builder.export_to(temp_file.name)
csv_file = open(temp_file.name, 'r')
csv_reader = csv.reader(csv_file)
header = next(csv_reader)

expected_header = [
'info/name', 'info/age', 'kids/has_kids', 'gps', '_gps_latitude',
'_gps_longitude', '_gps_altitude', '_gps_precision',
'web_browsers/firefox', 'web_browsers/chrome', 'web_browsers/ie',
'web_browsers/safari', 'meta/instanceID', '_id', '_uuid',
'_submission_time', '_date_modified', '_tags', '_notes',
'_version', '_duration', '_submitted_by', '_total_media',
'_media_count', '_media_all_received']
# Test form headers are present on exported csv file.
self.assertEqual(header, expected_header)

csv_file.close()

def test_export_data_for_xforms_with_newer_submissions(self):
"""
Test xform schema for form with no submission
is successfully exported
"""
fixture = "new_repeats"
# publish form so we have a dd
self._publish_xls_fixture_set_xform(fixture)

# Confirm form has not submissions so far
self.assertEquals(self.xform.instances.count(), 0)
# Generate csv export for form
csv_df_builder = CSVDataFrameBuilder(
self.user.username, self.xform.id_string, include_images=False)
temp_file = NamedTemporaryFile(suffix=".csv", delete=False)
csv_df_builder.export_to(temp_file.name)
csv_file = open(temp_file.name, 'r')
csv_reader = csv.reader(csv_file)
header = next(csv_reader)

expected_header = [
'info/name', 'info/age', 'kids/has_kids', 'gps', '_gps_latitude',
'_gps_longitude', '_gps_altitude', '_gps_precision',
'web_browsers/firefox', 'web_browsers/chrome', 'web_browsers/ie',
'web_browsers/safari', 'meta/instanceID', '_id', '_uuid',
'_submission_time', '_date_modified', '_tags', '_notes',
'_version', '_duration', '_submitted_by', '_total_media',
'_media_count', '_media_all_received']
# Test form headers are present on exported csv file.
self.assertEqual(header, expected_header)

# make sibmissions to xform after export was generated
for _ in range(4):
self._submit_fixture_instance("new_repeats", "01")
self._submit_fixture_instance("new_repeats", "02")
# pylint: disable=protected-access
record_count = csv_df_builder._query_data(count=True)
self.assertEqual(record_count, 5)
temp_file = NamedTemporaryFile(suffix=".csv", delete=False)
csv_df_builder.export_to(temp_file.name)
csv_file = open(temp_file.name, 'r')
csv_reader = csv.reader(csv_file)
newer_header = next(csv_reader)
expected_headers = [
'info/name', 'info/age', 'kids/has_kids',
'kids/kids_details[1]/kids_name', 'kids/kids_details[1]/kids_age',
'kids/kids_details[2]/kids_name', 'kids/kids_details[2]/kids_age',
'gps', '_gps_latitude', '_gps_longitude', '_gps_altitude',
'_gps_precision', 'web_browsers/firefox', 'web_browsers/chrome',
'web_browsers/ie', 'web_browsers/safari', 'meta/instanceID', '_id',
'_uuid', '_submission_time', '_date_modified', '_tags', '_notes',
'_version', '_duration', '_submitted_by', '_total_media',
'_media_count', '_media_all_received']

# Test export headers are recreated with repeat data.
self.assertEqual(newer_header, expected_headers)

self.assertEqual(len(header), 13 + len(csv_df_builder.extra_columns))
rows = []
for row in csv_reader:
rows.append(row)
self.assertEqual(len(rows), 5)
self.assertEqual(rows[4][5], NA_REP)

# close and delete file
csv_file.close()

def test_export_raises_NoRecordsFound_for_form_without_instances(self):
"""
Test exporting records for forms without submissions raises
NorecordsFound exception.
"""
fixture = "new_repeats"
# publish form so we have a dd
self._publish_xls_fixture_set_xform(fixture)

# Confirm form has not submissions so far
self.assertEquals(self.xform.instances.count(), 0)
# Generate csv export for form
csv_df_builder_1 = CSVDataFrameBuilder(
self.user.username,
self.xform.id_string,
split_select_multiples=True, binary_select_multiples=False,
include_images=False, show_choice_labels=True)
# Fetch form data throws NoRecordsFound exeption
with self.assertRaises(NoRecordsFoundError):
csv_df_builder_1._query_data()

@patch.object(CSVDataFrameBuilder, '_query_data')
def test_show_choice_labels_select_multiple_3(self, mock_query_data):
"""
Expand Down
90 changes: 40 additions & 50 deletions onadata/libs/utils/csv_builder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from collections import OrderedDict
from itertools import chain
from collections import OrderedDict

import unicodecsv as csv
from django.conf import settings
Expand Down Expand Up @@ -397,6 +397,8 @@ def __init__(self, username, id_string, filter_query=None,
show_choice_labels, include_reviews, language)

self.ordered_columns = OrderedDict()
self.image_xpaths = [] if not self.include_images \
else self.dd.get_media_survey_xpaths()

def _setup(self):
super(CSVDataFrameBuilder, self)._setup()
Expand Down Expand Up @@ -533,69 +535,54 @@ def _build_ordered_columns(cls, survey_element, ordered_columns,
# generated when we reindex
ordered_columns[child.get_abbreviated_xpath()] = None

def _update_columns_from_data(self, cursor):
def _update_ordered_columns_from_data(self, cursor):
"""
Populates `self.ordered_columns` object that is
used to generate export column headers for
forms that split select multiple and gps data.
"""
# add ordered columns for select multiples
if self.split_select_multiples:
for key, choices in self.select_multiples.items():
for (key, choices) in iteritems(self.select_multiples):
# HACK to ensure choices are NOT duplicated
if key in self.ordered_columns.keys():
self.ordered_columns[key] = \
remove_dups_from_list_maintain_order(
[choice.replace('/' + name, '/' + label)
if self.show_choice_labels else choice
for choice, name, label in choices])
if self.show_choice_labels else choice
for choice, name, label in choices])

# add ordered columns for gps fields
for key in self.gps_fields:
gps_xpaths = self.dd.get_additional_geopoint_xpaths(key)
self.ordered_columns[key] = [key] + gps_xpaths
image_xpaths = [] if not self.include_images \
else self.dd.get_media_survey_xpaths()

# add ordered columns for nested repeat data
for record in cursor:
# split select multiples
if self.split_select_multiples:
record = self._split_select_multiples(
record, self.select_multiples,
self.BINARY_SELECT_MULTIPLES, self.VALUE_SELECT_MULTIPLES,
show_choice_labels=self.show_choice_labels)
# check for gps and split into components i.e. latitude, longitude,
# altitude, precision
self._split_gps_fields(record, self.gps_fields)
self._tag_edit_string(record)
# re index repeats
# re index column repeats
for (key, value) in iteritems(record):
self._reindex(
key, value, self.ordered_columns, record, self.dd,
include_images=image_xpaths,
include_images=self.image_xpaths,
split_select_multiples=self.split_select_multiples,
index_tags=self.index_tags,
show_choice_labels=self.show_choice_labels,
language=self.language)

def _format_for_dataframe(self, cursor):
# TODO: check for and handle empty results
# add ordered columns for select multiples
if self.split_select_multiples:
for (key, choices) in iteritems(self.select_multiples):
# HACK to ensure choices are NOT duplicated
self.ordered_columns[key] = \
remove_dups_from_list_maintain_order(choices)
# add ordered columns for gps fields
for key in self.gps_fields:
gps_xpaths = self.dd.get_additional_geopoint_xpaths(key)
self.ordered_columns[key] = [key] + gps_xpaths
image_xpaths = [] if not self.include_images \
else self.dd.get_media_survey_xpaths()

"""
Unpacks nested repeat data for export.
"""
for record in cursor:
# split select multiples
if self.split_select_multiples:
record = self._split_select_multiples(
record, self.select_multiples,
self.BINARY_SELECT_MULTIPLES, self.VALUE_SELECT_MULTIPLES,
self.BINARY_SELECT_MULTIPLES,
self.VALUE_SELECT_MULTIPLES,
show_choice_labels=self.show_choice_labels)
# check for gps and split into components i.e. latitude, longitude,
# check for gps and split into
# components i.e. latitude, longitude,
# altitude, precision
self._split_gps_fields(record, self.gps_fields)
self._tag_edit_string(record)
Expand All @@ -604,13 +591,12 @@ def _format_for_dataframe(self, cursor):
for (key, value) in iteritems(record):
reindexed = self._reindex(
key, value, self.ordered_columns, record, self.dd,
include_images=image_xpaths,
include_images=self.image_xpaths,
split_select_multiples=self.split_select_multiples,
index_tags=self.index_tags,
show_choice_labels=self.show_choice_labels,
language=self.language)
flat_dict.update(reindexed)

yield flat_dict

def export_to(self, path, dataview=None):
Expand All @@ -622,38 +608,42 @@ def export_to(self, path, dataview=None):
filter_query=self.filter_query)
if isinstance(cursor, QuerySet):
cursor = cursor.iterator()
self._update_columns_from_data(cursor)

self._update_ordered_columns_from_data(cursor)

data = self._format_for_dataframe(cursor)

columns = list(chain.from_iterable(
[[xpath] if cols is None else cols
for (xpath, cols) in iteritems(self.ordered_columns)
if [c for c in dataview.columns if xpath.startswith(c)]]
))
cursor = dataview.query_data(dataview, all_data=True,
filter_query=self.filter_query)
if isinstance(cursor, QuerySet):
cursor = cursor.iterator()
data = self._format_for_dataframe(cursor)
else:
cursor = self._query_data(self.filter_query)
try:
cursor = self._query_data(self.filter_query)
except NoRecordsFoundError:
# Set cursor object to an an empty queryset
cursor = self.xform.instances.none()

self._update_ordered_columns_from_data(cursor)

if isinstance(cursor, QuerySet):
cursor = cursor.iterator()
self._update_columns_from_data(cursor)

# Unpack xform columns and data
data = self._format_for_dataframe(cursor)

columns = list(chain.from_iterable(
[[xpath] if cols is None else cols
for (xpath, cols) in iteritems(self.ordered_columns)]))
for (xpath, cols) in iteritems(self.ordered_columns)]))

# add extra columns
columns += [col for col in self.extra_columns]

for field in self.dd.get_survey_elements_of_type('osm'):
columns += OsmData.get_tag_keys(self.xform,
field.get_abbreviated_xpath(),
include_prefix=True)
cursor = self._query_data(self.filter_query)
if isinstance(cursor, QuerySet):
cursor = cursor.iterator()
data = self._format_for_dataframe(cursor)

columns_with_hxl = self.include_hxl and get_columns_with_hxl(
self.dd.survey_elements)
Expand Down

0 comments on commit c5030cb

Please sign in to comment.