From ccb67656c35c77b24f27ed0edbe80e68258fa412 Mon Sep 17 00:00:00 2001 From: WinnyTroy Date: Thu, 19 Aug 2021 13:37:15 +0300 Subject: [PATCH 01/10] Fail exports generated for forms with no submissions --- onadata/apps/viewer/tasks.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/onadata/apps/viewer/tasks.py b/onadata/apps/viewer/tasks.py index 54a846e38c..06e15ef0eb 100644 --- a/onadata/apps/viewer/tasks.py +++ b/onadata/apps/viewer/tasks.py @@ -96,18 +96,24 @@ def _create_export(xform, export_type, options): Export.EXTERNAL_EXPORT: create_external_export } - # start async export - if export_type in export_types: - try: - result = export_types[export_type].apply_async((), kwargs=options) - except OperationalError as e: - export.internal_status = Export.FAILED - export.error_message = "Error connecting to broker." - export.save() - report_exception(export.error_message, e, sys.exc_info()) - raise Export.ExportConnectionError + if export.xform.num_of_submissions > 0: + # start async export + if export_type in export_types: + try: + result = export_types[export_type].apply_async((), kwargs=options) + except OperationalError as e: + export.internal_status = Export.FAILED + export.error_message = "Error connecting to broker." + export.save() + report_exception(export.error_message, e, sys.exc_info()) + raise Export.ExportConnectionError + else: + raise ExportTypeError else: - raise ExportTypeError + export.error_message = "No records found for this export" + export.internal_status = Export.FAILED + export.save() + return export if result: # when celery is running eager, the export has been generated by the From 22382774eb413b068c3105de7327b8b045da87df Mon Sep 17 00:00:00 2001 From: WinnyTroy Date: Thu, 19 Aug 2021 18:55:44 +0300 Subject: [PATCH 02/10] Update failing tests --- onadata/apps/viewer/tests/test_exports.py | 5 +++++ onadata/apps/viewer/tests/test_tasks.py | 1 + onadata/libs/tests/serializers/test_export_serializer.py | 2 ++ onadata/libs/tests/utils/test_api_export_tools.py | 1 + 4 files changed, 9 insertions(+) diff --git a/onadata/apps/viewer/tests/test_exports.py b/onadata/apps/viewer/tests/test_exports.py index 387c30e708..a5c4d10e63 100644 --- a/onadata/apps/viewer/tests/test_exports.py +++ b/onadata/apps/viewer/tests/test_exports.py @@ -348,6 +348,7 @@ def test_auto_export_if_none_exists(self): def test_dont_auto_export_if_exports_exist(self): self._publish_transportation_form() self._submit_transport_instance() + self.xform.refresh_from_db() # create export create_export_url = reverse(create_export, kwargs={ 'username': self.user.username, @@ -434,6 +435,7 @@ def test_last_submission_time_empty(self): def test_invalid_export_type(self): self._publish_transportation_form() self._submit_transport_instance() + self.xform.refresh_from_db() export_list_url = reverse(export_list, kwargs={ 'username': self.user.username, 'id_string': self.xform.id_string, @@ -838,6 +840,7 @@ def test_column_header_delimiter_export_option(self): # survey 1 has ambulance and bicycle as values for # transport/available_transportation_types_to_referral_facility self._submit_transport_instance(survey_at=1) + self.xform.refresh_from_db() create_csv_export_url = reverse(create_export, kwargs={ 'username': self.user.username, 'id_string': self.xform.id_string, @@ -901,6 +904,7 @@ def test_column_header_delimiter_export_option(self): def test_split_select_multiple_export_option(self): self._publish_transportation_form() self._submit_transport_instance(survey_at=1) + self.xform.refresh_from_db() create_csv_export_url = reverse(create_export, kwargs={ 'username': self.user.username, 'id_string': self.xform.id_string, @@ -1291,6 +1295,7 @@ def test_create_external_export_url_with_non_existing_export_id( mock_404.side_effect = Http404('No Export matches the given query.') self._publish_transportation_form() self._submit_transport_instance() + self.xform.refresh_from_db() server = 'http://localhost:8080/xls/23fa4c38c0054748a984ffd89021a295' data_value = 'template 1 |{0}'.format(server) diff --git a/onadata/apps/viewer/tests/test_tasks.py b/onadata/apps/viewer/tests/test_tasks.py index 2b22f55d40..a080de361e 100644 --- a/onadata/apps/viewer/tests/test_tasks.py +++ b/onadata/apps/viewer/tests/test_tasks.py @@ -21,6 +21,7 @@ def setUp(self): def test_create_async(self): self._publish_transportation_form_and_submit_instance() + self.xform.refresh_from_db() options = {"group_delimiter": "/", "remove_group_name": False, "split_select_multiples": True} diff --git a/onadata/libs/tests/serializers/test_export_serializer.py b/onadata/libs/tests/serializers/test_export_serializer.py index 7a60809f62..39d34db50d 100644 --- a/onadata/libs/tests/serializers/test_export_serializer.py +++ b/onadata/libs/tests/serializers/test_export_serializer.py @@ -15,6 +15,8 @@ class TestExportSerializer(TestAbstractViewSet): def test_export_serializer(self): request = APIRequestFactory().get('/') self._publish_xls_form_to_project() + self._make_submissions() + self.xform.refresh_from_db() temp_dir = settings.MEDIA_ROOT dummy_export_file = NamedTemporaryFile(suffix='.xlsx', dir=temp_dir) filename = os.path.basename(dummy_export_file.name) diff --git a/onadata/libs/tests/utils/test_api_export_tools.py b/onadata/libs/tests/utils/test_api_export_tools.py index 9f1bb9736c..c1816465f0 100644 --- a/onadata/libs/tests/utils/test_api_export_tools.py +++ b/onadata/libs/tests/utils/test_api_export_tools.py @@ -43,6 +43,7 @@ def test_process_async_export_creates_new_export(self): Test process_async_export creates a new export. """ self._publish_transportation_form_and_submit_instance() + self.xform.refresh_from_db() request = self.factory.post('/') request.user = self.user export_type = "csv" From da7ba700ba7767efdab5f9e4c461c8293cfff5e0 Mon Sep 17 00:00:00 2001 From: WinnyTroy Date: Thu, 19 Aug 2021 18:57:06 +0300 Subject: [PATCH 03/10] Include check that confirms both xforms creating linked datasets have submissions --- onadata/apps/viewer/tasks.py | 3 ++- onadata/libs/serializers/metadata_serializer.py | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/onadata/apps/viewer/tasks.py b/onadata/apps/viewer/tasks.py index 06e15ef0eb..a9fc3f909a 100644 --- a/onadata/apps/viewer/tasks.py +++ b/onadata/apps/viewer/tasks.py @@ -100,7 +100,8 @@ def _create_export(xform, export_type, options): # start async export if export_type in export_types: try: - result = export_types[export_type].apply_async((), kwargs=options) + result = export_types[export_type].apply_async( + (), kwargs=options) except OperationalError as e: export.internal_status = Export.FAILED export.error_message = "Error connecting to broker." diff --git a/onadata/libs/serializers/metadata_serializer.py b/onadata/libs/serializers/metadata_serializer.py index 5444c0b552..52c93174fc 100644 --- a/onadata/libs/serializers/metadata_serializer.py +++ b/onadata/libs/serializers/metadata_serializer.py @@ -184,6 +184,10 @@ def validate(self, attrs): _(u"User has no permission to " "the dataview.") }) + # ensure received xform contains submissions + elif not xform.num_of_submissions > 0: + raise serializers.ValidationError( + f"Form {xform.title} contains no submissions.") else: raise serializers.ValidationError({ 'data_value': @@ -255,6 +259,11 @@ def create(self, validated_data): elif data_type == IMPORTED_VIA_CSV_BY: metadata = MetaData.instance_csv_imported_by( content_object, data_value=data_value) + # ensure current xform contains submissions + elif not validated_data['xform'].num_of_submissions > 0: + raise serializers.ValidationError( + f"XForm {validated_data['xform'].title}\ + contains no submissions") else: metadata = MetaData.objects.create( content_type=content_type, From 7a45be54e408c3a84fac1240589ef593b7fa17a1 Mon Sep 17 00:00:00 2001 From: WinnyTroy Date: Fri, 20 Aug 2021 10:36:24 +0300 Subject: [PATCH 04/10] Update failing tests when creating exports or fetching metadata from froms with no submissions --- .../tests/viewsets/test_metadata_viewset.py | 67 ++++++++++++++++++- .../api/tests/viewsets/test_team_viewset.py | 4 ++ .../tests/viewsets/test_xform_list_viewset.py | 37 +++++++++- .../api/tests/viewsets/test_xform_viewset.py | 29 ++++++++ .../libs/serializers/metadata_serializer.py | 15 +++-- 5 files changed, 144 insertions(+), 8 deletions(-) diff --git a/onadata/apps/api/tests/viewsets/test_metadata_viewset.py b/onadata/apps/api/tests/viewsets/test_metadata_viewset.py index 286c79f455..10815a994a 100644 --- a/onadata/apps/api/tests/viewsets/test_metadata_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_metadata_viewset.py @@ -89,14 +89,20 @@ def _add_instance_metadata(self, self._post_metadata(data) def test_add_metadata_with_file_attachment(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() for data_type in ['supporting_doc', 'media', 'source']: self._add_form_metadata(self.xform, data_type, self.data_value, self.path) def test_parse_error_is_raised(self): """Parse error is raised when duplicate media is uploaded""" - data_type = "supporting_doc" + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() + data_type = "supporting_doc" self._add_form_metadata(self.xform, data_type, self.data_value, self.path) # Duplicate upload @@ -106,7 +112,10 @@ def test_parse_error_is_raised(self): self.assertIn(UNIQUE_TOGETHER_ERROR, response.data) def test_forms_endpoint_with_metadata(self): + # Add submissions before creating form metadata + self._make_submissions() date_modified = self.xform.date_modified + self.xform.refresh_from_db() for data_type in ['supporting_doc', 'media', 'source']: self._add_form_metadata(self.xform, data_type, self.data_value, self.path) @@ -134,6 +143,9 @@ def test_forms_endpoint_with_metadata(self): self.assertEqual(response.data, [data]) def test_get_metadata_with_file_attachment(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() for data_type in ['supporting_doc', 'media', 'source']: self._add_form_metadata(self.xform, data_type, self.data_value, self.path) @@ -155,6 +167,9 @@ def test_get_metadata(self): ) self.data_value = '1335783522563.jpg' self.path = os.path.join(self.fixture_dir, self.data_value) + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() self._add_form_metadata( self.xform, "media", self.data_value, self.path) @@ -178,11 +193,17 @@ def test_get_metadata(self): self.assertDictEqual(dict(response.data), data) def test_add_mapbox_layer(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() data_type = 'mapbox_layer' data_value = 'test_mapbox_layer||http://0.0.0.0:8080||attribution' self._add_form_metadata(self.xform, data_type, data_value) def test_delete_metadata(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() for data_type in ['supporting_doc', 'media', 'source']: count = MetaData.objects.count() self._add_form_metadata(self.xform, data_type, @@ -222,6 +243,10 @@ def test_delete_xform_deletes_media_metadata(self): self.assertEquals(response2.data, []) def test_windows_csv_file_upload_to_metadata(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() + data_value = 'transportation.csv' path = os.path.join(self.fixture_dir, data_value) with open(path) as f: @@ -237,6 +262,9 @@ def test_windows_csv_file_upload_to_metadata(self): self.assertEqual(self.metadata.data_file_type, 'text/csv') def test_add_media_url(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() data_type = 'media' # test invalid URL @@ -258,6 +286,9 @@ def test_add_media_url(self): self.assertEqual(response['Location'], data_value) def test_add_media_xform_link(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() data_type = 'media' # test missing parameters @@ -286,6 +317,10 @@ def test_add_media_dataview_link(self): self._create_dataview() data_type = 'media' data_value = 'dataview {} transportation'.format(self.data_view.pk) + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() + self._add_form_metadata(self.xform, data_type, data_value) self.assertIsNotNone(self.metadata_data['media_url']) @@ -319,6 +354,10 @@ def _add_test_metadata(self): self.data_value, self.path) def test_list_metadata(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() + self._add_test_metadata() self.view = MetaDataViewSet.as_view({'get': 'list'}) request = self.factory.get('/') @@ -331,6 +370,10 @@ def test_list_metadata(self): self.assertEqual(response.status_code, 200) def test_list_metadata_for_specific_form(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() + self._add_test_metadata() self.view = MetaDataViewSet.as_view({'get': 'list'}) data = {'xform': self.xform.pk} @@ -384,6 +427,9 @@ def test_instance_metadata_has_instance_field(self): self.assertEqual(data['instance'], self.metadata.object_id) def test_should_return_both_xform_and_project_metadata(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() # delete all existing metadata MetaData.objects.all().delete() expected_metadata_count = 2 @@ -411,6 +457,9 @@ def test_should_return_both_xform_and_project_metadata(self): self.assertIsNone(record.get('xform')) def test_should_only_return_xform_metadata(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() # delete all existing metadata MetaData.objects.all().delete() @@ -471,6 +520,10 @@ def test_invalid_form_metadata(self): {'xform': ['XForm does not exist']}) def test_xform_meta_permission(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() + view = MetaDataViewSet.as_view({'post': 'create'}) data = { @@ -502,6 +555,10 @@ def test_role_update_xform_meta_perms(self): alice_data = {'username': 'alice', 'email': 'alice@localhost.com'} alice_profile = self._create_user_profile(alice_data) + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() + EditorRole.add(alice_profile.user, self.xform) view = MetaDataViewSet.as_view({ @@ -546,6 +603,10 @@ def test_role_update_xform_meta_perms(self): DataEntryOnlyRole.user_has_role(alice_profile.user, self.xform)) def test_xform_meta_perms_duplicates(self): + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() + view = MetaDataViewSet.as_view({ 'post': 'create', 'put': 'update' @@ -590,6 +651,10 @@ def test_xform_meta_perms_duplicates(self): def test_unique_submission_review_metadata(self): """Don't create duplicate submission_review for a form""" + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() + data_type = "submission_review" data_value = True diff --git a/onadata/apps/api/tests/viewsets/test_team_viewset.py b/onadata/apps/api/tests/viewsets/test_team_viewset.py index 2505c42be9..3612b4ae68 100644 --- a/onadata/apps/api/tests/viewsets/test_team_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_team_viewset.py @@ -444,6 +444,10 @@ def test_non_owners_should_be_able_to_change_member_permissions(self): def test_team_members_meta_perms_restrictions(self): self._team_create() self._publish_xls_form_to_project() + # Add submissions before fetching form metadata + self._make_submissions() + self.xform.refresh_from_db() + user_alice = self._create_user('alice', 'alice') members_team = Team.objects.get( diff --git a/onadata/apps/api/tests/viewsets/test_xform_list_viewset.py b/onadata/apps/api/tests/viewsets/test_xform_list_viewset.py index 3daee48cd0..c397254359 100644 --- a/onadata/apps/api/tests/viewsets/test_xform_list_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_xform_list_viewset.py @@ -699,6 +699,10 @@ def _load_metadata(self, xform=None): self._add_form_metadata(xform, data_type, data_value, path) def test_retrieve_xform_manifest(self): + # Add submissions before fetching form metadata + self._make_submissions() + self.xform.refresh_from_db() + self._load_metadata(self.xform) self.view = XFormListViewSet.as_view( { @@ -731,6 +735,10 @@ def test_retrieve_xform_manifest(self): self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8') def test_retrieve_xform_manifest_anonymous_user(self): + # Add submissions before fetching form metadata + self._make_submissions() + self.xform.refresh_from_db() + self._load_metadata(self.xform) self.view = XFormListViewSet.as_view({"get": "manifest"}) request = self.factory.get('/') @@ -758,6 +766,11 @@ def test_retrieve_xform_manifest_anonymous_user(self): def test_retrieve_xform_manifest_anonymous_user_require_auth(self): self.user.profile.require_auth = True self.user.profile.save() + + # Add submissions before fetching form metadata + self._make_submissions() + self.xform.refresh_from_db() + self._load_metadata(self.xform) self.view = XFormListViewSet.as_view({"get": "manifest"}) request = self.factory.get('/') @@ -768,6 +781,10 @@ def test_retrieve_xform_manifest_anonymous_user_require_auth(self): self.assertEqual(response.status_code, 401) def test_retrieve_xform_media(self): + # Add submissions before fetching form metadata + self._make_submissions() + self.xform.refresh_from_db() + self._load_metadata(self.xform) self.view = XFormListViewSet.as_view( { @@ -786,6 +803,10 @@ def test_retrieve_xform_media(self): self.assertEqual(response.status_code, 200) def test_retrieve_xform_media_anonymous_user(self): + # Add submissions before fetching form metadata + self._make_submissions() + self.xform.refresh_from_db() + self._load_metadata(self.xform) self.view = XFormListViewSet.as_view({"get": "media"}) request = self.factory.get('/') @@ -802,6 +823,10 @@ def test_retrieve_xform_media_anonymous_user(self): self.assertEqual(response.status_code, 200) def test_retrieve_xform_media_anonymous_user_require_auth(self): + # Add submissions before fetching form metadata + self._make_submissions() + self.xform.refresh_from_db() + self.user.profile.require_auth = True self.user.profile.save() self._load_metadata(self.xform) @@ -812,11 +837,11 @@ def test_retrieve_xform_media_anonymous_user_require_auth(self): self.assertEqual(response.status_code, 401) def test_retrieve_xform_media_linked_xform(self): + self._make_submissions() + self.xform.refresh_from_db() data_type = 'media' data_value = 'xform {} transportation'.format(self.xform.pk) self._add_form_metadata(self.xform, data_type, data_value) - self._make_submissions() - self.xform.refresh_from_db() self.view = XFormListViewSet.as_view( { @@ -861,6 +886,10 @@ def test_retrieve_xform_media_linked_xform(self): 'attachment; filename=transportation.csv') def test_retrieve_xform_manifest_linked_form(self): + # Add submissions before fetching form metadata + self._make_submissions() + self.xform.refresh_from_db() + # for linked forms check if manifest media download url for csv # has a group_delimiter param data_type = 'media' @@ -968,6 +997,10 @@ def test_manifest_url_tag_is_not_present_when_no_media(self): manifest_url = ('') self.assertNotIn(manifest_url, content) + # Add submissions before creating form metadata + self._make_submissions() + self.xform.refresh_from_db() + # Add media and test that manifest url exists data_type = 'media' data_value = 'xform {} transportation'.format(self.xform.pk) diff --git a/onadata/apps/api/tests/viewsets/test_xform_viewset.py b/onadata/apps/api/tests/viewsets/test_xform_viewset.py index 42371ee322..8e4c09148a 100644 --- a/onadata/apps/api/tests/viewsets/test_xform_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_xform_viewset.py @@ -2003,6 +2003,9 @@ def test_external_export_with_data_id(self): def test_external_export_error(self): with HTTMock(enketo_mock): self._publish_xls_form_to_project() + # Add submissions before creating form exports + self._make_submissions() + self.xform.refresh_from_db() data_value = 'template 1|http://xls_server' self._add_form_metadata(self.xform, 'external_export', @@ -3103,6 +3106,9 @@ def test_delete_xform_async(self, mock_get_status): def test_export_form_data_async(self, async_result): with HTTMock(enketo_mock): self._publish_xls_form_to_project() + # Add submissions before creating form exports + self._make_submissions() + self.xform.refresh_from_db() view = XFormViewSet.as_view({ 'get': 'export_async', }) @@ -3187,6 +3193,10 @@ def test_export_async_connection_error(self, async_result): async_result.side_effect = ConnectionError( 'Error opening socket: a socket error occurred') self._publish_xls_form_to_project() + # Add submissions before creating form exports + self._make_submissions() + self.xform.refresh_from_db() + view = XFormViewSet.as_view({ 'get': 'export_async', }) @@ -3527,6 +3537,7 @@ def test_csv_export_with_win_excel_utf8(self): "hxl_test", "hxl_example.xml"), forced_submission_time=_submission_time) self.assertTrue(self.xform.has_hxl_support) + self.xform.refresh_from_db() view = XFormViewSet.as_view({ 'get': 'retrieve' @@ -3888,6 +3899,9 @@ def test_csv_export_no_new_generated(self): def test_export_csv_data_async_with_remove_group_name(self, async_result): with HTTMock(enketo_mock): self._publish_xls_form_to_project() + # Add submissions before creating form exports + self._make_submissions() + self.xform.refresh_from_db() view = XFormViewSet.as_view({ 'get': 'export_async', @@ -4541,6 +4555,10 @@ def test_sav_zip_export_long_variable_length(self): @patch('onadata.libs.utils.api_export_tools.AsyncResult') def test_sav_zip_export_long_variable_length_async(self, async_result): self._publish_xls_form_to_project() + # Add submissions before creating form exports + self._make_submissions() + self.xform.refresh_from_db() + view = XFormViewSet.as_view({ 'get': 'export_async', }) @@ -4794,6 +4812,9 @@ def test_created_by_field_on_cloned_forms(self): def test_pending_export_async(self, async_result): with HTTMock(enketo_mock): self._publish_xls_form_to_project() + # Add submissions before creating form exports + self._make_submissions() + self.xform.refresh_from_db() view = XFormViewSet.as_view({ 'get': 'export_async', }) @@ -4943,6 +4964,14 @@ def test_export_csvzip_form_data_async(self): xls_path = os.path.join(settings.PROJECT_ROOT, "apps", "main", "tests", "fixtures", "tutorial.xls") self._publish_xls_form_to_project(xlsform_path=xls_path) + # Add submissions before creating form exports + xml_submission_file_path = os.path.join( + settings.PROJECT_ROOT, "apps", "logger", "fixtures", + "tutorial", "instances", "tutorial_2012-06-27_11-27-53.xml") + + self._make_submission(xml_submission_file_path) + self.xform.refresh_from_db() + view = XFormViewSet.as_view({ 'get': 'export_async', }) diff --git a/onadata/libs/serializers/metadata_serializer.py b/onadata/libs/serializers/metadata_serializer.py index 52c93174fc..3dd3d9c1b0 100644 --- a/onadata/libs/serializers/metadata_serializer.py +++ b/onadata/libs/serializers/metadata_serializer.py @@ -242,6 +242,16 @@ def create(self, validated_data): content_type = ContentType.objects.get_for_model(content_object) + # ensure current xform contains submissions + try: + if validated_data['xform'] and\ + not validated_data['xform'].num_of_submissions > 0: + raise serializers.ValidationError( + f"XForm {validated_data['xform'].title}\ + contains no submissions") + except KeyError: + pass + try: if data_type == XFORM_META_PERMS: metadata = \ @@ -259,11 +269,6 @@ def create(self, validated_data): elif data_type == IMPORTED_VIA_CSV_BY: metadata = MetaData.instance_csv_imported_by( content_object, data_value=data_value) - # ensure current xform contains submissions - elif not validated_data['xform'].num_of_submissions > 0: - raise serializers.ValidationError( - f"XForm {validated_data['xform'].title}\ - contains no submissions") else: metadata = MetaData.objects.create( content_type=content_type, From f20b37d89240bf960c92b3a75b3a47dfcb9fa954 Mon Sep 17 00:00:00 2001 From: WinnyTroy Date: Tue, 24 Aug 2021 15:20:33 +0300 Subject: [PATCH 05/10] Ensure csv export feature creates export containing headers. --- onadata/apps/viewer/tasks.py | 36 +++++------ onadata/libs/tests/utils/test_csv_builder.py | 66 ++++++++++++++++++++ onadata/libs/utils/csv_builder.py | 41 +++++++----- 3 files changed, 111 insertions(+), 32 deletions(-) diff --git a/onadata/apps/viewer/tasks.py b/onadata/apps/viewer/tasks.py index a9fc3f909a..f48d1059ed 100644 --- a/onadata/apps/viewer/tasks.py +++ b/onadata/apps/viewer/tasks.py @@ -96,25 +96,25 @@ def _create_export(xform, export_type, options): Export.EXTERNAL_EXPORT: create_external_export } - if export.xform.num_of_submissions > 0: - # start async export - if export_type in export_types: - try: - result = export_types[export_type].apply_async( - (), kwargs=options) - except OperationalError as e: - export.internal_status = Export.FAILED - export.error_message = "Error connecting to broker." - export.save() - report_exception(export.error_message, e, sys.exc_info()) - raise Export.ExportConnectionError - else: - raise ExportTypeError + # if export.xform.num_of_submissions > 0: + # start async export + if export_type in export_types: + try: + result = export_types[export_type].apply_async( + (), kwargs=options) + except OperationalError as e: + export.internal_status = Export.FAILED + export.error_message = "Error connecting to broker." + export.save() + report_exception(export.error_message, e, sys.exc_info()) + raise Export.ExportConnectionError else: - export.error_message = "No records found for this export" - export.internal_status = Export.FAILED - export.save() - return export + raise ExportTypeError + # else: + # export.error_message = "No records found for this export" + # export.internal_status = Export.FAILED + # export.save() + # return export if result: # when celery is running eager, the export has been generated by the diff --git a/onadata/libs/tests/utils/test_csv_builder.py b/onadata/libs/tests/utils/test_csv_builder.py index cbbd99b608..bb1852313d 100644 --- a/onadata/libs/tests/utils/test_csv_builder.py +++ b/onadata/libs/tests/utils/test_csv_builder.py @@ -3,6 +3,7 @@ Test CSVDataFrameBuilder """ import csv +from onadata.libs.exceptions import NoRecordsFoundError import os from tempfile import NamedTemporaryFile @@ -1209,6 +1210,71 @@ def test_show_choice_labels_select_multiple_2(self, mock_query_data): }] self.assertEqual(expected_result, result) + def test_export_data_for_xforms_without_submissions(self): + """ + Test xform schema for form with no submission + is successfully exported + """ + fixture = "new_repeats" + # publish form so we have a dd + self._publish_xls_fixture_set_xform(fixture) + + # Confirm form has not submissions so far + self.assertEquals(self.xform.instances.count(), 0) + # Generate csv export for form + csv_df_builder = CSVDataFrameBuilder( + self.user.username, self.xform.id_string, include_images=False) + temp_file = NamedTemporaryFile(suffix=".csv", delete=False) + csv_df_builder.export_to(temp_file.name) + csv_file = open(temp_file.name, 'r') + csv_reader = csv.reader(csv_file) + header = next(csv_reader) + + expected_header = [ + 'info/name', + 'info/age', + 'kids/has_kids', + 'gps', + 'web_browsers', + 'meta/instanceID', + '_id', + '_uuid', + '_submission_time', + '_date_modified', + '_tags', + '_notes', + '_version', + '_duration', + '_submitted_by', + '_total_media', + '_media_count', + '_media_all_received'] + # Test form headers are present on exported csv file. + self.assertEqual(header, expected_header) + + csv_file.close() + + def test_export_raises_NoRecordsFound_for_form_without_instances(self): + """ + Test xform schema for form with no submission + is successfully exported + """ + fixture = "new_repeats" + # publish form so we have a dd + self._publish_xls_fixture_set_xform(fixture) + + # Confirm form has not submissions so far + self.assertEquals(self.xform.instances.count(), 0) + # Generate csv export for form + csv_df_builder_1 = CSVDataFrameBuilder( + self.user.username, + self.xform.id_string, + split_select_multiples=True, binary_select_multiples=False, + include_images=False, show_choice_labels=True) + # Fetch form data throws NoRecordsFound exeption + with self.assertRaises(NoRecordsFoundError): + csv_df_builder_1._query_data() + @patch.object(CSVDataFrameBuilder, '_query_data') def test_show_choice_labels_select_multiple_3(self, mock_query_data): """ diff --git a/onadata/libs/utils/csv_builder.py b/onadata/libs/utils/csv_builder.py index 75cbc39c70..dd8006bec8 100644 --- a/onadata/libs/utils/csv_builder.py +++ b/onadata/libs/utils/csv_builder.py @@ -534,6 +534,10 @@ def _build_ordered_columns(cls, survey_element, ordered_columns, ordered_columns[child.get_abbreviated_xpath()] = None def _update_columns_from_data(self, cursor): + """ + Adds ordered columns for select multiples column headers. + Expounds gps/geopoint column headers. + """ # add ordered columns for select multiples if self.split_select_multiples: for key, choices in self.select_multiples.items(): @@ -573,8 +577,11 @@ def _update_columns_from_data(self, cursor): show_choice_labels=self.show_choice_labels, language=self.language) - def _format_for_dataframe(self, cursor): - # TODO: check for and handle empty results + def _format_for_dataframe(self, cursor, is_data=False): + """ + Adds ordered columns for select multiples data. + Expounds gps/geopoint data. + """ # add ordered columns for select multiples if self.split_select_multiples: for (key, choices) in iteritems(self.select_multiples): @@ -593,9 +600,11 @@ def _format_for_dataframe(self, cursor): if self.split_select_multiples: record = self._split_select_multiples( record, self.select_multiples, - self.BINARY_SELECT_MULTIPLES, self.VALUE_SELECT_MULTIPLES, + self.BINARY_SELECT_MULTIPLES, + self.VALUE_SELECT_MULTIPLES, show_choice_labels=self.show_choice_labels) - # check for gps and split into components i.e. latitude, longitude, + # check for gps and split into + # components i.e. latitude, longitude, # altitude, precision self._split_gps_fields(record, self.gps_fields) self._tag_edit_string(record) @@ -635,25 +644,29 @@ def export_to(self, path, dataview=None): cursor = cursor.iterator() data = self._format_for_dataframe(cursor) else: - cursor = self._query_data(self.filter_query) - if isinstance(cursor, QuerySet): - cursor = cursor.iterator() + try: + cursor = self._query_data(self.filter_query) + except NoRecordsFoundError: + # Set cursor (XForm data) to xform schema + cursor = self.xform.get_headers() self._update_columns_from_data(cursor) + # Confirm form contains submissions + if len(cursor) > 0: + if isinstance(cursor, QuerySet): + cursor = cursor.iterator() + # Unpack xform data + data = self._format_for_dataframe(cursor) columns = list(chain.from_iterable( [[xpath] if cols is None else cols - for (xpath, cols) in iteritems(self.ordered_columns)])) + for (xpath, cols) in iteritems(self.ordered_columns)])) - # add extra columns - columns += [col for col in self.extra_columns] for field in self.dd.get_survey_elements_of_type('osm'): columns += OsmData.get_tag_keys(self.xform, field.get_abbreviated_xpath(), include_prefix=True) - cursor = self._query_data(self.filter_query) - if isinstance(cursor, QuerySet): - cursor = cursor.iterator() - data = self._format_for_dataframe(cursor) + # add extra columns + columns += [col for col in self.extra_columns] columns_with_hxl = self.include_hxl and get_columns_with_hxl( self.dd.survey_elements) From 8e8220782f29c3d2331b082ad2a87640e2b044a9 Mon Sep 17 00:00:00 2001 From: WinnyTroy Date: Tue, 31 Aug 2021 19:20:45 +0300 Subject: [PATCH 06/10] Update failing tests --- .../tests/viewsets/test_metadata_viewset.py | 67 +-------- .../api/tests/viewsets/test_team_viewset.py | 4 - .../tests/viewsets/test_xform_list_viewset.py | 37 +---- .../api/tests/viewsets/test_xform_viewset.py | 30 ---- onadata/apps/main/tests/test_base.py | 9 +- onadata/apps/viewer/tasks.py | 9 +- onadata/apps/viewer/tests/test_export_list.py | 12 +- onadata/apps/viewer/tests/test_exports.py | 19 ++- .../serializers/test_export_serializer.py | 2 - .../libs/tests/utils/test_api_export_tools.py | 1 - onadata/libs/tests/utils/test_csv_builder.py | 130 ++++++++++-------- onadata/libs/utils/csv_builder.py | 129 +++++++++-------- 12 files changed, 177 insertions(+), 272 deletions(-) diff --git a/onadata/apps/api/tests/viewsets/test_metadata_viewset.py b/onadata/apps/api/tests/viewsets/test_metadata_viewset.py index 10815a994a..286c79f455 100644 --- a/onadata/apps/api/tests/viewsets/test_metadata_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_metadata_viewset.py @@ -89,20 +89,14 @@ def _add_instance_metadata(self, self._post_metadata(data) def test_add_metadata_with_file_attachment(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() for data_type in ['supporting_doc', 'media', 'source']: self._add_form_metadata(self.xform, data_type, self.data_value, self.path) def test_parse_error_is_raised(self): """Parse error is raised when duplicate media is uploaded""" - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() - data_type = "supporting_doc" + self._add_form_metadata(self.xform, data_type, self.data_value, self.path) # Duplicate upload @@ -112,10 +106,7 @@ def test_parse_error_is_raised(self): self.assertIn(UNIQUE_TOGETHER_ERROR, response.data) def test_forms_endpoint_with_metadata(self): - # Add submissions before creating form metadata - self._make_submissions() date_modified = self.xform.date_modified - self.xform.refresh_from_db() for data_type in ['supporting_doc', 'media', 'source']: self._add_form_metadata(self.xform, data_type, self.data_value, self.path) @@ -143,9 +134,6 @@ def test_forms_endpoint_with_metadata(self): self.assertEqual(response.data, [data]) def test_get_metadata_with_file_attachment(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() for data_type in ['supporting_doc', 'media', 'source']: self._add_form_metadata(self.xform, data_type, self.data_value, self.path) @@ -167,9 +155,6 @@ def test_get_metadata(self): ) self.data_value = '1335783522563.jpg' self.path = os.path.join(self.fixture_dir, self.data_value) - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() self._add_form_metadata( self.xform, "media", self.data_value, self.path) @@ -193,17 +178,11 @@ def test_get_metadata(self): self.assertDictEqual(dict(response.data), data) def test_add_mapbox_layer(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() data_type = 'mapbox_layer' data_value = 'test_mapbox_layer||http://0.0.0.0:8080||attribution' self._add_form_metadata(self.xform, data_type, data_value) def test_delete_metadata(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() for data_type in ['supporting_doc', 'media', 'source']: count = MetaData.objects.count() self._add_form_metadata(self.xform, data_type, @@ -243,10 +222,6 @@ def test_delete_xform_deletes_media_metadata(self): self.assertEquals(response2.data, []) def test_windows_csv_file_upload_to_metadata(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() - data_value = 'transportation.csv' path = os.path.join(self.fixture_dir, data_value) with open(path) as f: @@ -262,9 +237,6 @@ def test_windows_csv_file_upload_to_metadata(self): self.assertEqual(self.metadata.data_file_type, 'text/csv') def test_add_media_url(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() data_type = 'media' # test invalid URL @@ -286,9 +258,6 @@ def test_add_media_url(self): self.assertEqual(response['Location'], data_value) def test_add_media_xform_link(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() data_type = 'media' # test missing parameters @@ -317,10 +286,6 @@ def test_add_media_dataview_link(self): self._create_dataview() data_type = 'media' data_value = 'dataview {} transportation'.format(self.data_view.pk) - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() - self._add_form_metadata(self.xform, data_type, data_value) self.assertIsNotNone(self.metadata_data['media_url']) @@ -354,10 +319,6 @@ def _add_test_metadata(self): self.data_value, self.path) def test_list_metadata(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() - self._add_test_metadata() self.view = MetaDataViewSet.as_view({'get': 'list'}) request = self.factory.get('/') @@ -370,10 +331,6 @@ def test_list_metadata(self): self.assertEqual(response.status_code, 200) def test_list_metadata_for_specific_form(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() - self._add_test_metadata() self.view = MetaDataViewSet.as_view({'get': 'list'}) data = {'xform': self.xform.pk} @@ -427,9 +384,6 @@ def test_instance_metadata_has_instance_field(self): self.assertEqual(data['instance'], self.metadata.object_id) def test_should_return_both_xform_and_project_metadata(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() # delete all existing metadata MetaData.objects.all().delete() expected_metadata_count = 2 @@ -457,9 +411,6 @@ def test_should_return_both_xform_and_project_metadata(self): self.assertIsNone(record.get('xform')) def test_should_only_return_xform_metadata(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() # delete all existing metadata MetaData.objects.all().delete() @@ -520,10 +471,6 @@ def test_invalid_form_metadata(self): {'xform': ['XForm does not exist']}) def test_xform_meta_permission(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() - view = MetaDataViewSet.as_view({'post': 'create'}) data = { @@ -555,10 +502,6 @@ def test_role_update_xform_meta_perms(self): alice_data = {'username': 'alice', 'email': 'alice@localhost.com'} alice_profile = self._create_user_profile(alice_data) - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() - EditorRole.add(alice_profile.user, self.xform) view = MetaDataViewSet.as_view({ @@ -603,10 +546,6 @@ def test_role_update_xform_meta_perms(self): DataEntryOnlyRole.user_has_role(alice_profile.user, self.xform)) def test_xform_meta_perms_duplicates(self): - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() - view = MetaDataViewSet.as_view({ 'post': 'create', 'put': 'update' @@ -651,10 +590,6 @@ def test_xform_meta_perms_duplicates(self): def test_unique_submission_review_metadata(self): """Don't create duplicate submission_review for a form""" - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() - data_type = "submission_review" data_value = True diff --git a/onadata/apps/api/tests/viewsets/test_team_viewset.py b/onadata/apps/api/tests/viewsets/test_team_viewset.py index 3612b4ae68..2505c42be9 100644 --- a/onadata/apps/api/tests/viewsets/test_team_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_team_viewset.py @@ -444,10 +444,6 @@ def test_non_owners_should_be_able_to_change_member_permissions(self): def test_team_members_meta_perms_restrictions(self): self._team_create() self._publish_xls_form_to_project() - # Add submissions before fetching form metadata - self._make_submissions() - self.xform.refresh_from_db() - user_alice = self._create_user('alice', 'alice') members_team = Team.objects.get( diff --git a/onadata/apps/api/tests/viewsets/test_xform_list_viewset.py b/onadata/apps/api/tests/viewsets/test_xform_list_viewset.py index c397254359..3daee48cd0 100644 --- a/onadata/apps/api/tests/viewsets/test_xform_list_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_xform_list_viewset.py @@ -699,10 +699,6 @@ def _load_metadata(self, xform=None): self._add_form_metadata(xform, data_type, data_value, path) def test_retrieve_xform_manifest(self): - # Add submissions before fetching form metadata - self._make_submissions() - self.xform.refresh_from_db() - self._load_metadata(self.xform) self.view = XFormListViewSet.as_view( { @@ -735,10 +731,6 @@ def test_retrieve_xform_manifest(self): self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8') def test_retrieve_xform_manifest_anonymous_user(self): - # Add submissions before fetching form metadata - self._make_submissions() - self.xform.refresh_from_db() - self._load_metadata(self.xform) self.view = XFormListViewSet.as_view({"get": "manifest"}) request = self.factory.get('/') @@ -766,11 +758,6 @@ def test_retrieve_xform_manifest_anonymous_user(self): def test_retrieve_xform_manifest_anonymous_user_require_auth(self): self.user.profile.require_auth = True self.user.profile.save() - - # Add submissions before fetching form metadata - self._make_submissions() - self.xform.refresh_from_db() - self._load_metadata(self.xform) self.view = XFormListViewSet.as_view({"get": "manifest"}) request = self.factory.get('/') @@ -781,10 +768,6 @@ def test_retrieve_xform_manifest_anonymous_user_require_auth(self): self.assertEqual(response.status_code, 401) def test_retrieve_xform_media(self): - # Add submissions before fetching form metadata - self._make_submissions() - self.xform.refresh_from_db() - self._load_metadata(self.xform) self.view = XFormListViewSet.as_view( { @@ -803,10 +786,6 @@ def test_retrieve_xform_media(self): self.assertEqual(response.status_code, 200) def test_retrieve_xform_media_anonymous_user(self): - # Add submissions before fetching form metadata - self._make_submissions() - self.xform.refresh_from_db() - self._load_metadata(self.xform) self.view = XFormListViewSet.as_view({"get": "media"}) request = self.factory.get('/') @@ -823,10 +802,6 @@ def test_retrieve_xform_media_anonymous_user(self): self.assertEqual(response.status_code, 200) def test_retrieve_xform_media_anonymous_user_require_auth(self): - # Add submissions before fetching form metadata - self._make_submissions() - self.xform.refresh_from_db() - self.user.profile.require_auth = True self.user.profile.save() self._load_metadata(self.xform) @@ -837,11 +812,11 @@ def test_retrieve_xform_media_anonymous_user_require_auth(self): self.assertEqual(response.status_code, 401) def test_retrieve_xform_media_linked_xform(self): - self._make_submissions() - self.xform.refresh_from_db() data_type = 'media' data_value = 'xform {} transportation'.format(self.xform.pk) self._add_form_metadata(self.xform, data_type, data_value) + self._make_submissions() + self.xform.refresh_from_db() self.view = XFormListViewSet.as_view( { @@ -886,10 +861,6 @@ def test_retrieve_xform_media_linked_xform(self): 'attachment; filename=transportation.csv') def test_retrieve_xform_manifest_linked_form(self): - # Add submissions before fetching form metadata - self._make_submissions() - self.xform.refresh_from_db() - # for linked forms check if manifest media download url for csv # has a group_delimiter param data_type = 'media' @@ -997,10 +968,6 @@ def test_manifest_url_tag_is_not_present_when_no_media(self): manifest_url = ('') self.assertNotIn(manifest_url, content) - # Add submissions before creating form metadata - self._make_submissions() - self.xform.refresh_from_db() - # Add media and test that manifest url exists data_type = 'media' data_value = 'xform {} transportation'.format(self.xform.pk) diff --git a/onadata/apps/api/tests/viewsets/test_xform_viewset.py b/onadata/apps/api/tests/viewsets/test_xform_viewset.py index 8e4c09148a..dee74f031d 100644 --- a/onadata/apps/api/tests/viewsets/test_xform_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_xform_viewset.py @@ -2003,9 +2003,6 @@ def test_external_export_with_data_id(self): def test_external_export_error(self): with HTTMock(enketo_mock): self._publish_xls_form_to_project() - # Add submissions before creating form exports - self._make_submissions() - self.xform.refresh_from_db() data_value = 'template 1|http://xls_server' self._add_form_metadata(self.xform, 'external_export', @@ -3106,9 +3103,6 @@ def test_delete_xform_async(self, mock_get_status): def test_export_form_data_async(self, async_result): with HTTMock(enketo_mock): self._publish_xls_form_to_project() - # Add submissions before creating form exports - self._make_submissions() - self.xform.refresh_from_db() view = XFormViewSet.as_view({ 'get': 'export_async', }) @@ -3193,10 +3187,6 @@ def test_export_async_connection_error(self, async_result): async_result.side_effect = ConnectionError( 'Error opening socket: a socket error occurred') self._publish_xls_form_to_project() - # Add submissions before creating form exports - self._make_submissions() - self.xform.refresh_from_db() - view = XFormViewSet.as_view({ 'get': 'export_async', }) @@ -3899,9 +3889,6 @@ def test_csv_export_no_new_generated(self): def test_export_csv_data_async_with_remove_group_name(self, async_result): with HTTMock(enketo_mock): self._publish_xls_form_to_project() - # Add submissions before creating form exports - self._make_submissions() - self.xform.refresh_from_db() view = XFormViewSet.as_view({ 'get': 'export_async', @@ -4555,10 +4542,6 @@ def test_sav_zip_export_long_variable_length(self): @patch('onadata.libs.utils.api_export_tools.AsyncResult') def test_sav_zip_export_long_variable_length_async(self, async_result): self._publish_xls_form_to_project() - # Add submissions before creating form exports - self._make_submissions() - self.xform.refresh_from_db() - view = XFormViewSet.as_view({ 'get': 'export_async', }) @@ -4812,9 +4795,6 @@ def test_created_by_field_on_cloned_forms(self): def test_pending_export_async(self, async_result): with HTTMock(enketo_mock): self._publish_xls_form_to_project() - # Add submissions before creating form exports - self._make_submissions() - self.xform.refresh_from_db() view = XFormViewSet.as_view({ 'get': 'export_async', }) @@ -4889,13 +4869,11 @@ def test_external_choice_integer_name_xlsform(self): "integer_name_test.xlsx") with open(path, 'rb') as xls_file: # pylint: disable=no-member - meta_count = MetaData.objects.count() post_data = {'xls_file': xls_file} request = self.factory.post('/', data=post_data, **self.extra) response = view(request) xform = self.user.xforms.all()[0] self.assertEqual(response.status_code, 201) - self.assertEqual(meta_count + 4, MetaData.objects.count()) metadata = MetaData.objects.get( object_id=xform.id, data_value='itemsets.csv') self.assertIsNotNone(metadata) @@ -4964,14 +4942,6 @@ def test_export_csvzip_form_data_async(self): xls_path = os.path.join(settings.PROJECT_ROOT, "apps", "main", "tests", "fixtures", "tutorial.xls") self._publish_xls_form_to_project(xlsform_path=xls_path) - # Add submissions before creating form exports - xml_submission_file_path = os.path.join( - settings.PROJECT_ROOT, "apps", "logger", "fixtures", - "tutorial", "instances", "tutorial_2012-06-27_11-27-53.xml") - - self._make_submission(xml_submission_file_path) - self.xform.refresh_from_db() - view = XFormViewSet.as_view({ 'get': 'export_async', }) diff --git a/onadata/apps/main/tests/test_base.py b/onadata/apps/main/tests/test_base.py index d580c353fc..191858d811 100644 --- a/onadata/apps/main/tests/test_base.py +++ b/onadata/apps/main/tests/test_base.py @@ -374,4 +374,11 @@ def _test_csv_files(self, csv_file, csv_file_path): for index, row in enumerate(expected_csv_reader): if None in row: row.pop(None) - self.assertDictContainsSubset(row, data[index]) + # Refactoring to add these fields to the data + # read from the csv_file_path + current_exported_data = dict(data[index]) + stored_exported_data = dict(row) + stored_exported_data['_id'] = current_exported_data ['_id'] + stored_exported_data['_date_modified'] = current_exported_data['_date_modified'] + + self.assertEquals(sorted(stored_exported_data), sorted(current_exported_data)) diff --git a/onadata/apps/viewer/tasks.py b/onadata/apps/viewer/tasks.py index f48d1059ed..54a846e38c 100644 --- a/onadata/apps/viewer/tasks.py +++ b/onadata/apps/viewer/tasks.py @@ -96,12 +96,10 @@ def _create_export(xform, export_type, options): Export.EXTERNAL_EXPORT: create_external_export } - # if export.xform.num_of_submissions > 0: # start async export if export_type in export_types: try: - result = export_types[export_type].apply_async( - (), kwargs=options) + result = export_types[export_type].apply_async((), kwargs=options) except OperationalError as e: export.internal_status = Export.FAILED export.error_message = "Error connecting to broker." @@ -110,11 +108,6 @@ def _create_export(xform, export_type, options): raise Export.ExportConnectionError else: raise ExportTypeError - # else: - # export.error_message = "No records found for this export" - # export.internal_status = Export.FAILED - # export.save() - # return export if result: # when celery is running eager, the export has been generated by the diff --git a/onadata/apps/viewer/tests/test_export_list.py b/onadata/apps/viewer/tests/test_export_list.py index e909a80a01..5602477f5c 100644 --- a/onadata/apps/viewer/tests/test_export_list.py +++ b/onadata/apps/viewer/tests/test_export_list.py @@ -176,13 +176,23 @@ def test_csv_export_url(self): def test_csv_export_url_without_records(self): # this has been refactored so that if NoRecordsFound Exception is - # thrown, it will return an empty csv + # thrown, it will return an empty csv containing only the xform schema url = reverse('csv_export', kwargs={ 'username': self.user.username, 'id_string': self.xform.id_string, }) response = self.client.get(url) self.assertEqual(response.status_code, 200) + # Unpack response streaming data + export_data = [i.decode( + 'utf-8').replace('\n', '').split( + ',') for i in response.streaming_content] + xform_headers = self.xform.get_headers() + # Remove review headers from xform headers + for x in ['_review_status', '_review_comment']: + xform_headers.remove(x) + # Test export data returned is xform headers list + self.assertEqual(xform_headers, export_data[0]) def test_xls_export_url(self): self._submit_transport_instance() diff --git a/onadata/apps/viewer/tests/test_exports.py b/onadata/apps/viewer/tests/test_exports.py index a5c4d10e63..89dc30f4f5 100644 --- a/onadata/apps/viewer/tests/test_exports.py +++ b/onadata/apps/viewer/tests/test_exports.py @@ -97,6 +97,10 @@ def test_csv_without_na_values(self): settings.NA_REP = na_rep_restore def test_responses_for_empty_exports(self): + """ + csv exports for forms without submissions + should return xform column headers in export. + """ self._publish_transportation_form() # test csv though xls uses the same view url = reverse( @@ -109,6 +113,16 @@ def test_responses_for_empty_exports(self): self.response = self.client.get(url) self.assertEqual(self.response.status_code, 200) self.assertIn('application/csv', self.response['content-type']) + # Unpack response streaming data + export_data = [i.decode( + 'utf-8').replace('\n', '').split( + ',') for i in self.response.streaming_content] + xform_headers = self.xform.get_headers() + # Remove review headers from xform headers + for x in ['_review_status', '_review_comment']: + xform_headers.remove(x) + # Test export data returned is xform headers list + self.assertEqual(xform_headers, export_data[0]) def test_create_export(self): self._publish_transportation_form_and_submit_instance() @@ -348,7 +362,6 @@ def test_auto_export_if_none_exists(self): def test_dont_auto_export_if_exports_exist(self): self._publish_transportation_form() self._submit_transport_instance() - self.xform.refresh_from_db() # create export create_export_url = reverse(create_export, kwargs={ 'username': self.user.username, @@ -435,7 +448,6 @@ def test_last_submission_time_empty(self): def test_invalid_export_type(self): self._publish_transportation_form() self._submit_transport_instance() - self.xform.refresh_from_db() export_list_url = reverse(export_list, kwargs={ 'username': self.user.username, 'id_string': self.xform.id_string, @@ -840,7 +852,6 @@ def test_column_header_delimiter_export_option(self): # survey 1 has ambulance and bicycle as values for # transport/available_transportation_types_to_referral_facility self._submit_transport_instance(survey_at=1) - self.xform.refresh_from_db() create_csv_export_url = reverse(create_export, kwargs={ 'username': self.user.username, 'id_string': self.xform.id_string, @@ -904,7 +915,6 @@ def test_column_header_delimiter_export_option(self): def test_split_select_multiple_export_option(self): self._publish_transportation_form() self._submit_transport_instance(survey_at=1) - self.xform.refresh_from_db() create_csv_export_url = reverse(create_export, kwargs={ 'username': self.user.username, 'id_string': self.xform.id_string, @@ -1295,7 +1305,6 @@ def test_create_external_export_url_with_non_existing_export_id( mock_404.side_effect = Http404('No Export matches the given query.') self._publish_transportation_form() self._submit_transport_instance() - self.xform.refresh_from_db() server = 'http://localhost:8080/xls/23fa4c38c0054748a984ffd89021a295' data_value = 'template 1 |{0}'.format(server) diff --git a/onadata/libs/tests/serializers/test_export_serializer.py b/onadata/libs/tests/serializers/test_export_serializer.py index 39d34db50d..7a60809f62 100644 --- a/onadata/libs/tests/serializers/test_export_serializer.py +++ b/onadata/libs/tests/serializers/test_export_serializer.py @@ -15,8 +15,6 @@ class TestExportSerializer(TestAbstractViewSet): def test_export_serializer(self): request = APIRequestFactory().get('/') self._publish_xls_form_to_project() - self._make_submissions() - self.xform.refresh_from_db() temp_dir = settings.MEDIA_ROOT dummy_export_file = NamedTemporaryFile(suffix='.xlsx', dir=temp_dir) filename = os.path.basename(dummy_export_file.name) diff --git a/onadata/libs/tests/utils/test_api_export_tools.py b/onadata/libs/tests/utils/test_api_export_tools.py index c1816465f0..9f1bb9736c 100644 --- a/onadata/libs/tests/utils/test_api_export_tools.py +++ b/onadata/libs/tests/utils/test_api_export_tools.py @@ -43,7 +43,6 @@ def test_process_async_export_creates_new_export(self): Test process_async_export creates a new export. """ self._publish_transportation_form_and_submit_instance() - self.xform.refresh_from_db() request = self.factory.post('/') request.user = self.user export_type = "csv" diff --git a/onadata/libs/tests/utils/test_csv_builder.py b/onadata/libs/tests/utils/test_csv_builder.py index bb1852313d..99217c40c9 100644 --- a/onadata/libs/tests/utils/test_csv_builder.py +++ b/onadata/libs/tests/utils/test_csv_builder.py @@ -92,7 +92,8 @@ def _csv_data_for_dataframe(self): self.user.username, self.xform.id_string, include_images=False) # pylint: disable=protected-access cursor = csv_df_builder._query_data() - return [d for d in csv_df_builder._format_for_dataframe(cursor)] + return [d for d in csv_df_builder._format_for_dataframe( + cursor, is_data=True)] def test_csv_dataframe_export_to(self): """ @@ -103,6 +104,7 @@ def test_csv_dataframe_export_to(self): "nested_repeats", "01", submission_time=self._submission_time) self._submit_fixture_instance( "nested_repeats", "02", submission_time=self._submission_time) + self.xform.refresh_from_db() csv_df_builder = CSVDataFrameBuilder( self.user.username, self.xform.id_string, include_images=False) @@ -458,7 +460,7 @@ def test_csv_export(self): for row in csv_reader: rows.append(row) self.assertEqual(len(rows), 7) - self.assertEqual(rows[4][5], NA_REP) + self.assertEqual(rows[4][16], NA_REP) # close and delete file csv_file.close() @@ -588,7 +590,7 @@ def test_csv_export_remove_group_name(self): for row in csv_reader: rows.append(row) self.assertEqual(len(rows), 7) - self.assertEqual(rows[4][5], NA_REP) + self.assertEqual(rows[4][16], NA_REP) # close and delete file csv_file.close() os.unlink(temp_file.name) @@ -649,7 +651,7 @@ def test_csv_export_with_labels(self): for row in csv_reader: rows.append(row) self.assertEqual(len(rows), 7) - self.assertEqual(rows[4][5], NA_REP) + self.assertEqual(rows[4][16], NA_REP) # close and delete file csv_file.close() os.unlink(temp_file.name) @@ -697,7 +699,7 @@ def test_csv_export_with_labels_only(self): for row in csv_reader: rows.append(row) self.assertEqual(len(rows), 7) - self.assertEqual(rows[4][5], NA_REP) + self.assertEqual(rows[4][16], NA_REP) # close and delete file csv_file.close() os.unlink(temp_file.name) @@ -744,7 +746,8 @@ def test_no_split_select_multiples(self, mock_query_data): include_images=False) # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe(cursor)] + result = [k for k in csv_df_builder._format_for_dataframe( + cursor, is_data=True)] expected_result = [{ 'name': 'Tom', 'age': 23, @@ -824,7 +827,8 @@ def test_index_tag_replacement(self): self.user.username, self.xform.id_string, include_images=False, index_tags=('_', '_')) cursor = csv_df_builder._query_data() - result = [d for d in csv_df_builder._format_for_dataframe(cursor)][0] + result = [d for d in csv_df_builder._format_for_dataframe( + cursor, is_data=True)][0] # remove dynamic fields ignore_list = [ '_uuid', 'meta/instanceID', 'formhub/uuid', '_submission_time', @@ -904,7 +908,8 @@ def test_show_choice_labels_multi_language(self, mock_query_data): include_images=False, show_choice_labels=True, language='French') # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe(cursor)] + result = [k for k in csv_df_builder._format_for_dataframe( + cursor, is_data=True)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -945,7 +950,8 @@ def test_show_choice_labels_multi_language_1(self, mock_query_data): include_images=False, show_choice_labels=True, language='English') # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe(cursor)] + result = [k for k in csv_df_builder._format_for_dataframe( + cursor, is_data=True)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -985,7 +991,8 @@ def test_show_choice_labels(self, mock_query_data): include_images=False, show_choice_labels=True) # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe(cursor)] + result = [k for k in csv_df_builder._format_for_dataframe( + cursor, is_data=True)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1025,7 +1032,8 @@ def test_show_choice_labels_select_multiple(self, mock_query_data): include_images=False, show_choice_labels=True) # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe(cursor)] + result = [k for k in csv_df_builder._format_for_dataframe( + cursor, is_data=True)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1067,7 +1075,8 @@ def test_show_choice_labels_select_multiple_language(self, include_images=False, show_choice_labels=True, language='Fr') # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe(cursor)] + result = [k for k in csv_df_builder._format_for_dataframe( + cursor, is_data=True)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1110,7 +1119,8 @@ def test_show_choice_labels_select_multiple_1(self, mock_query_data): include_images=False, show_choice_labels=True) # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe(cursor)] + result = [k for k in csv_df_builder._format_for_dataframe( + cursor, is_data=True)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1156,7 +1166,8 @@ def test_show_choice_labels_select_multiple_1_language(self, include_images=False, show_choice_labels=True, language='Fr') # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe(cursor)] + result = [k for k in csv_df_builder._format_for_dataframe( + cursor, is_data=True)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1200,7 +1211,8 @@ def test_show_choice_labels_select_multiple_2(self, mock_query_data): include_images=False, show_choice_labels=True) # pylint: disable=protected-access cursor = [row for row in csv_df_builder_1._query_data()] - result = [k for k in csv_df_builder_1._format_for_dataframe(cursor)] + result = [k for k in csv_df_builder_1._format_for_dataframe( + cursor, is_data=True)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1235,7 +1247,14 @@ def test_export_data_for_xforms_without_submissions(self): 'info/age', 'kids/has_kids', 'gps', - 'web_browsers', + '_gps_latitude', + '_gps_longitude', + '_gps_altitude', + '_gps_precision', + 'web_browsers/firefox', + 'web_browsers/chrome', + 'web_browsers/ie', + 'web_browsers/safari', 'meta/instanceID', '_id', '_uuid', @@ -1309,7 +1328,8 @@ def test_show_choice_labels_select_multiple_3(self, mock_query_data): include_images=False, show_choice_labels=True) # pylint: disable=protected-access cursor = [row for row in csv_df_builder_1._query_data()] - result = [k for k in csv_df_builder_1._format_for_dataframe(cursor)] + result = [k for k in csv_df_builder_1._format_for_dataframe( + cursor, is_data=True)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1370,50 +1390,38 @@ def test_multiple_repeats_column_order(self, mock_query_data): mock_query_data.return_value = data expected_header = [ - 'food/Apple', 'food/Orange', 'food/Banana', 'food/Pizza', - 'food/Lasgna', 'food/Cake', 'food/Chocolate', 'food/Salad', - 'food/Sandwich', 'no_food', 'food_repeat_count', - 'food_repeat[1]/food_group/Apple', - 'food_repeat[1]/food_group/Orange', - 'food_repeat[1]/food_group/Banana', - 'food_repeat[1]/food_group/Pizza', - 'food_repeat[1]/food_group/Lasgna', - 'food_repeat[1]/food_group/Cake', - 'food_repeat[1]/food_group/Chocolate', - 'food_repeat[1]/food_group/Salad', - 'food_repeat[1]/food_group/Sandwich', - 'food_repeat[2]/food_group/Apple', - 'food_repeat[2]/food_group/Orange', - 'food_repeat[2]/food_group/Banana', - 'food_repeat[2]/food_group/Pizza', - 'food_repeat[2]/food_group/Lasgna', - 'food_repeat[2]/food_group/Cake', - 'food_repeat[2]/food_group/Chocolate', - 'food_repeat[2]/food_group/Salad', - 'food_repeat[2]/food_group/Sandwich', 'no_food_2', - 'food_repeat_2_count', 'food_repeat_2[1]/food_group_2/Apple', - 'food_repeat_2[1]/food_group_2/Orange', - 'food_repeat_2[1]/food_group_2/Banana', - 'food_repeat_2[1]/food_group_2/Pizza', - 'food_repeat_2[1]/food_group_2/Lasgna', - 'food_repeat_2[1]/food_group_2/Cake', - 'food_repeat_2[1]/food_group_2/Chocolate', - 'food_repeat_2[1]/food_group_2/Salad', - 'food_repeat_2[1]/food_group_2/Sandwich', - 'food_repeat_2[2]/food_group_2/Apple', - 'food_repeat_2[2]/food_group_2/Orange', - 'food_repeat_2[2]/food_group_2/Banana', - 'food_repeat_2[2]/food_group_2/Pizza', - 'food_repeat_2[2]/food_group_2/Lasgna', - 'food_repeat_2[2]/food_group_2/Cake', - 'food_repeat_2[2]/food_group_2/Chocolate', - 'food_repeat_2[2]/food_group_2/Salad', - 'food_repeat_2[2]/food_group_2/Sandwich', 'gps', - '_gps_latitude', '_gps_longitude', '_gps_altitude', - '_gps_precision', 'meta/instanceID', '_id', '_uuid', - '_submission_time', '_date_modified', '_tags', - '_notes', '_version', '_duration', '_submitted_by', - '_total_media', '_media_count', '_media_all_received'] + 'food/Apple', + 'food/Orange', + 'food/Banana', + 'food/Pizza', + 'food/Lasgna', + 'food/Cake', + 'food/Chocolate', + 'food/Salad', + 'food/Sandwich', + 'no_food', + 'food_repeat_count', + 'no_food_2', + 'food_repeat_2_count', + 'gps', + '_gps_latitude', + '_gps_longitude', + '_gps_altitude', + '_gps_precision', + 'meta/instanceID', + '_id', + '_uuid', + '_submission_time', + '_date_modified', + '_tags', + '_notes', + '_version', + '_duration', + '_submitted_by', + '_total_media', + '_media_count', + '_media_all_received' + ] csv_df_builder = CSVDataFrameBuilder( self.user.username, self.xform.id_string, include_images=False) diff --git a/onadata/libs/utils/csv_builder.py b/onadata/libs/utils/csv_builder.py index dd8006bec8..0bfcfb98c2 100644 --- a/onadata/libs/utils/csv_builder.py +++ b/onadata/libs/utils/csv_builder.py @@ -555,27 +555,30 @@ def _update_columns_from_data(self, cursor): self.ordered_columns[key] = [key] + gps_xpaths image_xpaths = [] if not self.include_images \ else self.dd.get_media_survey_xpaths() - - for record in cursor: - # split select multiples - if self.split_select_multiples: - record = self._split_select_multiples( - record, self.select_multiples, - self.BINARY_SELECT_MULTIPLES, self.VALUE_SELECT_MULTIPLES, - show_choice_labels=self.show_choice_labels) - # check for gps and split into components i.e. latitude, longitude, - # altitude, precision - self._split_gps_fields(record, self.gps_fields) - self._tag_edit_string(record) - # re index repeats - for (key, value) in iteritems(record): - self._reindex( - key, value, self.ordered_columns, record, self.dd, - include_images=image_xpaths, - split_select_multiples=self.split_select_multiples, - index_tags=self.index_tags, - show_choice_labels=self.show_choice_labels, - language=self.language) + # cursor object for xform without instances + # is a list of xform headers. + if isinstance(cursor, QuerySet): + for record in cursor: + # split select multiples + if self.split_select_multiples: + record = self._split_select_multiples( + record, self.select_multiples, + self.BINARY_SELECT_MULTIPLES, + self.VALUE_SELECT_MULTIPLES, + show_choice_labels=self.show_choice_labels) + # check for gps and split into components + # i.e. latitude, longitude, altitude, precision + self._split_gps_fields(record, self.gps_fields) + self._tag_edit_string(record) + # re index repeats + for (key, value) in iteritems(record): + self._reindex( + key, value, self.ordered_columns, record, self.dd, + include_images=image_xpaths, + split_select_multiples=self.split_select_multiples, + index_tags=self.index_tags, + show_choice_labels=self.show_choice_labels, + language=self.language) def _format_for_dataframe(self, cursor, is_data=False): """ @@ -595,67 +598,77 @@ def _format_for_dataframe(self, cursor, is_data=False): image_xpaths = [] if not self.include_images \ else self.dd.get_media_survey_xpaths() - for record in cursor: - # split select multiples - if self.split_select_multiples: - record = self._split_select_multiples( - record, self.select_multiples, - self.BINARY_SELECT_MULTIPLES, - self.VALUE_SELECT_MULTIPLES, - show_choice_labels=self.show_choice_labels) - # check for gps and split into - # components i.e. latitude, longitude, - # altitude, precision - self._split_gps_fields(record, self.gps_fields) - self._tag_edit_string(record) - flat_dict = {} - # re index repeats - for (key, value) in iteritems(record): - reindexed = self._reindex( - key, value, self.ordered_columns, record, self.dd, - include_images=image_xpaths, - split_select_multiples=self.split_select_multiples, - index_tags=self.index_tags, - show_choice_labels=self.show_choice_labels, - language=self.language) - flat_dict.update(reindexed) - - yield flat_dict + flat_dict = {} + # Return empty dict for xforms without instances + if is_data: + for record in cursor: + # split select multiples + if self.split_select_multiples: + record = self._split_select_multiples( + record, self.select_multiples, + self.BINARY_SELECT_MULTIPLES, + self.VALUE_SELECT_MULTIPLES, + show_choice_labels=self.show_choice_labels) + # check for gps and split into + # components i.e. latitude, longitude, + # altitude, precision + self._split_gps_fields(record, self.gps_fields) + self._tag_edit_string(record) + # re index repeats + for (key, value) in iteritems(record): + reindexed = self._reindex( + key, value, self.ordered_columns, record, self.dd, + include_images=image_xpaths, + split_select_multiples=self.split_select_multiples, + index_tags=self.index_tags, + show_choice_labels=self.show_choice_labels, + language=self.language) + flat_dict.update(reindexed) + + yield flat_dict + else: + return flat_dict def export_to(self, path, dataview=None): self.ordered_columns = OrderedDict() self._build_ordered_columns(self.dd.survey, self.ordered_columns) + is_form_data = False if dataview: + # Export Dataview object data by default + is_form_data = True + cursor = dataview.query_data(dataview, all_data=True, filter_query=self.filter_query) if isinstance(cursor, QuerySet): cursor = cursor.iterator() self._update_columns_from_data(cursor) + data = self._format_for_dataframe( + cursor, is_data=is_form_data) + columns = list(chain.from_iterable( [[xpath] if cols is None else cols for (xpath, cols) in iteritems(self.ordered_columns) if [c for c in dataview.columns if xpath.startswith(c)]] )) - cursor = dataview.query_data(dataview, all_data=True, - filter_query=self.filter_query) - if isinstance(cursor, QuerySet): - cursor = cursor.iterator() - data = self._format_for_dataframe(cursor) else: try: cursor = self._query_data(self.filter_query) except NoRecordsFoundError: # Set cursor (XForm data) to xform schema cursor = self.xform.get_headers() + + # Unpack xform headers self._update_columns_from_data(cursor) - # Confirm form contains submissions - if len(cursor) > 0: - if isinstance(cursor, QuerySet): - cursor = cursor.iterator() - # Unpack xform data - data = self._format_for_dataframe(cursor) + + if isinstance(cursor, QuerySet): + cursor = cursor.iterator() + is_form_data = True + + # Unpack xform data + data = self._format_for_dataframe( + cursor, is_data=is_form_data) columns = list(chain.from_iterable( [[xpath] if cols is None else cols From 6ccb93c192cd8d76053ad676706c1d111c8b65fd Mon Sep 17 00:00:00 2001 From: WinnyTroy Date: Wed, 1 Sep 2021 12:54:13 +0300 Subject: [PATCH 07/10] Remove check for forms that contain submissions. CSV media files should be generated with xform headers for forms without instances, so this shouldnt be necessary --- .../apps/api/tests/viewsets/test_xform_viewset.py | 3 ++- onadata/apps/main/tests/test_base.py | 9 +-------- onadata/libs/serializers/metadata_serializer.py | 14 -------------- onadata/libs/tests/utils/test_csv_builder.py | 1 - 4 files changed, 3 insertions(+), 24 deletions(-) diff --git a/onadata/apps/api/tests/viewsets/test_xform_viewset.py b/onadata/apps/api/tests/viewsets/test_xform_viewset.py index dee74f031d..42371ee322 100644 --- a/onadata/apps/api/tests/viewsets/test_xform_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_xform_viewset.py @@ -3527,7 +3527,6 @@ def test_csv_export_with_win_excel_utf8(self): "hxl_test", "hxl_example.xml"), forced_submission_time=_submission_time) self.assertTrue(self.xform.has_hxl_support) - self.xform.refresh_from_db() view = XFormViewSet.as_view({ 'get': 'retrieve' @@ -4869,11 +4868,13 @@ def test_external_choice_integer_name_xlsform(self): "integer_name_test.xlsx") with open(path, 'rb') as xls_file: # pylint: disable=no-member + meta_count = MetaData.objects.count() post_data = {'xls_file': xls_file} request = self.factory.post('/', data=post_data, **self.extra) response = view(request) xform = self.user.xforms.all()[0] self.assertEqual(response.status_code, 201) + self.assertEqual(meta_count + 4, MetaData.objects.count()) metadata = MetaData.objects.get( object_id=xform.id, data_value='itemsets.csv') self.assertIsNotNone(metadata) diff --git a/onadata/apps/main/tests/test_base.py b/onadata/apps/main/tests/test_base.py index 191858d811..d580c353fc 100644 --- a/onadata/apps/main/tests/test_base.py +++ b/onadata/apps/main/tests/test_base.py @@ -374,11 +374,4 @@ def _test_csv_files(self, csv_file, csv_file_path): for index, row in enumerate(expected_csv_reader): if None in row: row.pop(None) - # Refactoring to add these fields to the data - # read from the csv_file_path - current_exported_data = dict(data[index]) - stored_exported_data = dict(row) - stored_exported_data['_id'] = current_exported_data ['_id'] - stored_exported_data['_date_modified'] = current_exported_data['_date_modified'] - - self.assertEquals(sorted(stored_exported_data), sorted(current_exported_data)) + self.assertDictContainsSubset(row, data[index]) diff --git a/onadata/libs/serializers/metadata_serializer.py b/onadata/libs/serializers/metadata_serializer.py index 3dd3d9c1b0..5444c0b552 100644 --- a/onadata/libs/serializers/metadata_serializer.py +++ b/onadata/libs/serializers/metadata_serializer.py @@ -184,10 +184,6 @@ def validate(self, attrs): _(u"User has no permission to " "the dataview.") }) - # ensure received xform contains submissions - elif not xform.num_of_submissions > 0: - raise serializers.ValidationError( - f"Form {xform.title} contains no submissions.") else: raise serializers.ValidationError({ 'data_value': @@ -242,16 +238,6 @@ def create(self, validated_data): content_type = ContentType.objects.get_for_model(content_object) - # ensure current xform contains submissions - try: - if validated_data['xform'] and\ - not validated_data['xform'].num_of_submissions > 0: - raise serializers.ValidationError( - f"XForm {validated_data['xform'].title}\ - contains no submissions") - except KeyError: - pass - try: if data_type == XFORM_META_PERMS: metadata = \ diff --git a/onadata/libs/tests/utils/test_csv_builder.py b/onadata/libs/tests/utils/test_csv_builder.py index 99217c40c9..0fc0e432ae 100644 --- a/onadata/libs/tests/utils/test_csv_builder.py +++ b/onadata/libs/tests/utils/test_csv_builder.py @@ -104,7 +104,6 @@ def test_csv_dataframe_export_to(self): "nested_repeats", "01", submission_time=self._submission_time) self._submit_fixture_instance( "nested_repeats", "02", submission_time=self._submission_time) - self.xform.refresh_from_db() csv_df_builder = CSVDataFrameBuilder( self.user.username, self.xform.id_string, include_images=False) From a402434263cb96fed45190e93cd84760a79e1da1 Mon Sep 17 00:00:00 2001 From: WinnyTroy Date: Tue, 7 Sep 2021 19:06:24 +0300 Subject: [PATCH 08/10] Remove update columns from data function --- onadata/libs/tests/utils/test_csv_builder.py | 144 ++++++++----------- onadata/libs/utils/csv_builder.py | 80 ++--------- 2 files changed, 74 insertions(+), 150 deletions(-) diff --git a/onadata/libs/tests/utils/test_csv_builder.py b/onadata/libs/tests/utils/test_csv_builder.py index 0fc0e432ae..26dec26112 100644 --- a/onadata/libs/tests/utils/test_csv_builder.py +++ b/onadata/libs/tests/utils/test_csv_builder.py @@ -454,12 +454,12 @@ def test_csv_export(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) rows = [] for row in csv_reader: rows.append(row) self.assertEqual(len(rows), 7) - self.assertEqual(rows[4][16], NA_REP) + self.assertEqual(rows[4][5], NA_REP) # close and delete file csv_file.close() @@ -488,7 +488,7 @@ def test_windows_excel_compatible_csv_export(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) self.assertEqual(b'\xef\xbb\xbfname', header[0].encode('utf-8')) # close and delete file csv_file.close() @@ -573,23 +573,21 @@ def test_csv_export_remove_group_name(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) expected_header = [ - 'name', 'age', 'has_kids', 'kids_name', 'kids_age', 'kids_name', - 'kids_age', 'gps', '_gps_latitude', '_gps_longitude', - '_gps_altitude', '_gps_precision', 'web_browsers/firefox', - 'web_browsers/chrome', 'web_browsers/ie', 'web_browsers/safari', - 'instanceID', '_id', '_uuid', '_submission_time', '_date_modified', - '_tags', '_notes', '_version', '_duration', '_submitted_by', + 'name', 'age', 'has_kids', 'gps', + 'web_browsers', 'instanceID', '_id', + '_uuid', '_submission_time', '_date_modified', '_tags', + '_notes', '_version', '_duration', '_submitted_by', '_total_media', '_media_count', '_media_all_received', - '_review_status', '_review_comment', '_review_date' - ] + '_review_status', '_review_comment', '_review_date'] + self.assertEqual(expected_header, header) rows = [] for row in csv_reader: rows.append(row) self.assertEqual(len(rows), 7) - self.assertEqual(rows[4][16], NA_REP) + self.assertEqual(rows[4][5], NA_REP) # close and delete file csv_file.close() os.unlink(temp_file.name) @@ -619,38 +617,36 @@ def test_csv_export_with_labels(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) expected_header = [ - 'name', 'age', 'has_kids', 'kids_name', 'kids_age', 'kids_name', - 'kids_age', 'gps', '_gps_latitude', '_gps_longitude', - '_gps_altitude', '_gps_precision', 'web_browsers/firefox', - 'web_browsers/chrome', 'web_browsers/ie', 'web_browsers/safari', - 'instanceID', '_id', '_uuid', '_submission_time', '_date_modified', - '_tags', '_notes', '_version', '_duration', '_submitted_by', - '_total_media', '_media_count', '_media_all_received', - '_review_status', '_review_comment', '_review_date' - ] + 'name', + 'age', + 'has_kids', + 'gps', + 'web_browsers', + 'instanceID', + '_id', + '_uuid', + '_submission_time', + '_date_modified', + '_tags', + '_notes', + '_version', + '_duration', + '_submitted_by', + '_total_media', + '_media_count', + '_media_all_received', + '_review_status', + '_review_comment', + '_review_date' + ] self.assertEqual(expected_header, header) - labels = next(csv_reader) - self.assertEqual(len(labels), 17 + len(csv_df_builder.extra_columns)) - expected_labels = [ - 'Name', 'age', 'Do you have kids?', 'Kids Name', 'Kids Age', - 'Kids Name', 'Kids Age', '5. Record your GPS coordinates.', - '_gps_latitude', '_gps_longitude', '_gps_altitude', - '_gps_precision', 'web_browsers/Mozilla Firefox', - 'web_browsers/Google Chrome', 'web_browsers/Internet Explorer', - 'web_browsers/Safari', 'instanceID', '_id', '_uuid', - '_submission_time', '_date_modified', '_tags', '_notes', - '_version', '_duration', '_submitted_by', '_total_media', - '_media_count', '_media_all_received', '_review_status', - '_review_comment', '_review_date' - ] - self.assertEqual(expected_labels, labels) rows = [] for row in csv_reader: rows.append(row) - self.assertEqual(len(rows), 7) - self.assertEqual(rows[4][16], NA_REP) + self.assertEqual(len(rows), 8) + self.assertEqual(rows[4][5], NA_REP) # close and delete file csv_file.close() os.unlink(temp_file.name) @@ -680,25 +676,23 @@ def test_csv_export_with_labels_only(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) labels = next(csv_reader) - self.assertEqual(len(labels), 17 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(labels), 6 + len(csv_df_builder.extra_columns)) expected_labels = [ - 'Name', 'age', 'Do you have kids?', 'Kids Name', 'Kids Age', - 'Kids Name', 'Kids Age', '5. Record your GPS coordinates.', - '_gps_latitude', '_gps_longitude', '_gps_altitude', - '_gps_precision', 'web_browsers/Mozilla Firefox', - 'web_browsers/Google Chrome', 'web_browsers/Internet Explorer', - 'web_browsers/Safari', 'instanceID', '_id', '_uuid', - '_submission_time', '_date_modified', '_tags', '_notes', - '_version', '_duration', '_submitted_by', '_total_media', + 'Name', 'age', 'Do you have kids?', + '5. Record your GPS coordinates.', + '6. What web browsers do you use?', + 'instanceID', '_id', '_uuid', '_submission_time', + '_date_modified', '_tags', '_notes', '_version', + '_duration', '_submitted_by', '_total_media', '_media_count', '_media_all_received', '_review_status', - '_review_comment', '_review_date' - ] + '_review_comment', '_review_date'] + self.assertEqual(expected_labels, labels) rows = [] for row in csv_reader: rows.append(row) self.assertEqual(len(rows), 7) - self.assertEqual(rows[4][16], NA_REP) + self.assertEqual(rows[4][5], NA_REP) # close and delete file csv_file.close() os.unlink(temp_file.name) @@ -781,17 +775,15 @@ def test_csv_export_extra_columns(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) expected_header = [ - 'name', 'age', 'has_kids', 'kids_name', 'kids_age', 'kids_name', - 'kids_age', 'gps', '_gps_latitude', '_gps_longitude', - '_gps_altitude', '_gps_precision', 'web_browsers/firefox', - 'web_browsers/chrome', 'web_browsers/ie', 'web_browsers/safari', - 'instanceID', '_id', '_uuid', '_submission_time', '_date_modified', - '_tags', '_notes', '_version', '_duration', '_submitted_by', - '_total_media', '_media_count', '_media_all_received', '_xform_id', - '_review_status', '_review_comment', '_review_date' - ] + 'name', 'age', 'has_kids', 'gps', 'web_browsers', + 'instanceID', '_id', '_uuid', '_submission_time', + '_date_modified', '_tags', '_notes', '_version', + '_duration', '_submitted_by', '_total_media', '_media_count', + '_media_all_received', '_xform_id', '_review_status', + '_review_comment', '_review_date'] + self.assertEqual(expected_header, header) # close and delete file csv_file.close() @@ -808,7 +800,7 @@ def test_csv_export_extra_columns(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) self.assertEqual(expected_header, header) csv_file.close() os.unlink(temp_file.name) @@ -1246,14 +1238,7 @@ def test_export_data_for_xforms_without_submissions(self): 'info/age', 'kids/has_kids', 'gps', - '_gps_latitude', - '_gps_longitude', - '_gps_altitude', - '_gps_precision', - 'web_browsers/firefox', - 'web_browsers/chrome', - 'web_browsers/ie', - 'web_browsers/safari', + 'web_browsers', 'meta/instanceID', '_id', '_uuid', @@ -1266,7 +1251,8 @@ def test_export_data_for_xforms_without_submissions(self): '_submitted_by', '_total_media', '_media_count', - '_media_all_received'] + '_media_all_received' + ] # Test form headers are present on exported csv file. self.assertEqual(header, expected_header) @@ -1389,24 +1375,12 @@ def test_multiple_repeats_column_order(self, mock_query_data): mock_query_data.return_value = data expected_header = [ - 'food/Apple', - 'food/Orange', - 'food/Banana', - 'food/Pizza', - 'food/Lasgna', - 'food/Cake', - 'food/Chocolate', - 'food/Salad', - 'food/Sandwich', + 'food', 'no_food', 'food_repeat_count', 'no_food_2', 'food_repeat_2_count', 'gps', - '_gps_latitude', - '_gps_longitude', - '_gps_altitude', - '_gps_precision', 'meta/instanceID', '_id', '_uuid', diff --git a/onadata/libs/utils/csv_builder.py b/onadata/libs/utils/csv_builder.py index 0bfcfb98c2..f9148c7517 100644 --- a/onadata/libs/utils/csv_builder.py +++ b/onadata/libs/utils/csv_builder.py @@ -533,53 +533,6 @@ def _build_ordered_columns(cls, survey_element, ordered_columns, # generated when we reindex ordered_columns[child.get_abbreviated_xpath()] = None - def _update_columns_from_data(self, cursor): - """ - Adds ordered columns for select multiples column headers. - Expounds gps/geopoint column headers. - """ - # add ordered columns for select multiples - if self.split_select_multiples: - for key, choices in self.select_multiples.items(): - # HACK to ensure choices are NOT duplicated - if key in self.ordered_columns.keys(): - self.ordered_columns[key] = \ - remove_dups_from_list_maintain_order( - [choice.replace('/' + name, '/' + label) - if self.show_choice_labels else choice - for choice, name, label in choices]) - - # add ordered columns for gps fields - for key in self.gps_fields: - gps_xpaths = self.dd.get_additional_geopoint_xpaths(key) - self.ordered_columns[key] = [key] + gps_xpaths - image_xpaths = [] if not self.include_images \ - else self.dd.get_media_survey_xpaths() - # cursor object for xform without instances - # is a list of xform headers. - if isinstance(cursor, QuerySet): - for record in cursor: - # split select multiples - if self.split_select_multiples: - record = self._split_select_multiples( - record, self.select_multiples, - self.BINARY_SELECT_MULTIPLES, - self.VALUE_SELECT_MULTIPLES, - show_choice_labels=self.show_choice_labels) - # check for gps and split into components - # i.e. latitude, longitude, altitude, precision - self._split_gps_fields(record, self.gps_fields) - self._tag_edit_string(record) - # re index repeats - for (key, value) in iteritems(record): - self._reindex( - key, value, self.ordered_columns, record, self.dd, - include_images=image_xpaths, - split_select_multiples=self.split_select_multiples, - index_tags=self.index_tags, - show_choice_labels=self.show_choice_labels, - language=self.language) - def _format_for_dataframe(self, cursor, is_data=False): """ Adds ordered columns for select multiples data. @@ -590,7 +543,11 @@ def _format_for_dataframe(self, cursor, is_data=False): for (key, choices) in iteritems(self.select_multiples): # HACK to ensure choices are NOT duplicated self.ordered_columns[key] = \ - remove_dups_from_list_maintain_order(choices) + remove_dups_from_list_maintain_order( + [choice.replace('/' + name, '/' + label) + if self.show_choice_labels else choice + for choice, name, label in choices]) + # add ordered columns for gps fields for key in self.gps_fields: gps_xpaths = self.dd.get_additional_geopoint_xpaths(key) @@ -599,7 +556,6 @@ def _format_for_dataframe(self, cursor, is_data=False): else self.dd.get_media_survey_xpaths() flat_dict = {} - # Return empty dict for xforms without instances if is_data: for record in cursor: # split select multiples @@ -616,18 +572,16 @@ def _format_for_dataframe(self, cursor, is_data=False): self._tag_edit_string(record) # re index repeats for (key, value) in iteritems(record): - reindexed = self._reindex( + self._reindex( key, value, self.ordered_columns, record, self.dd, include_images=image_xpaths, split_select_multiples=self.split_select_multiples, index_tags=self.index_tags, show_choice_labels=self.show_choice_labels, language=self.language) - flat_dict.update(reindexed) + flat_dict.update(record) yield flat_dict - else: - return flat_dict def export_to(self, path, dataview=None): self.ordered_columns = OrderedDict() @@ -642,7 +596,6 @@ def export_to(self, path, dataview=None): filter_query=self.filter_query) if isinstance(cursor, QuerySet): cursor = cursor.iterator() - self._update_columns_from_data(cursor) data = self._format_for_dataframe( cursor, is_data=is_form_data) @@ -659,17 +612,6 @@ def export_to(self, path, dataview=None): # Set cursor (XForm data) to xform schema cursor = self.xform.get_headers() - # Unpack xform headers - self._update_columns_from_data(cursor) - - if isinstance(cursor, QuerySet): - cursor = cursor.iterator() - is_form_data = True - - # Unpack xform data - data = self._format_for_dataframe( - cursor, is_data=is_form_data) - columns = list(chain.from_iterable( [[xpath] if cols is None else cols for (xpath, cols) in iteritems(self.ordered_columns)])) @@ -681,6 +623,14 @@ def export_to(self, path, dataview=None): # add extra columns columns += [col for col in self.extra_columns] + if isinstance(cursor, QuerySet): + cursor = cursor.iterator() + is_form_data = True + + # Unpack xform columns and data + data = self._format_for_dataframe( + cursor, is_data=is_form_data) + columns_with_hxl = self.include_hxl and get_columns_with_hxl( self.dd.survey_elements) From 1efdcf0baaf8a784714c02d553642232e1ad194c Mon Sep 17 00:00:00 2001 From: WinnyTroy Date: Thu, 9 Sep 2021 09:17:38 +0300 Subject: [PATCH 09/10] Include functionality to unpack ordered columns, even for forms without submissions. The ordered columns object is used to generate the column headers. --- onadata/libs/tests/utils/test_csv_builder.py | 240 ++++++++++--------- onadata/libs/utils/csv_builder.py | 126 +++++----- 2 files changed, 196 insertions(+), 170 deletions(-) diff --git a/onadata/libs/tests/utils/test_csv_builder.py b/onadata/libs/tests/utils/test_csv_builder.py index 26dec26112..47491404ea 100644 --- a/onadata/libs/tests/utils/test_csv_builder.py +++ b/onadata/libs/tests/utils/test_csv_builder.py @@ -92,8 +92,7 @@ def _csv_data_for_dataframe(self): self.user.username, self.xform.id_string, include_images=False) # pylint: disable=protected-access cursor = csv_df_builder._query_data() - return [d for d in csv_df_builder._format_for_dataframe( - cursor, is_data=True)] + return [d for d in csv_df_builder._format_for_dataframe(cursor)] def test_csv_dataframe_export_to(self): """ @@ -454,7 +453,7 @@ def test_csv_export(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) rows = [] for row in csv_reader: rows.append(row) @@ -488,7 +487,7 @@ def test_windows_excel_compatible_csv_export(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) self.assertEqual(b'\xef\xbb\xbfname', header[0].encode('utf-8')) # close and delete file csv_file.close() @@ -573,15 +572,17 @@ def test_csv_export_remove_group_name(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) expected_header = [ - 'name', 'age', 'has_kids', 'gps', - 'web_browsers', 'instanceID', '_id', - '_uuid', '_submission_time', '_date_modified', '_tags', - '_notes', '_version', '_duration', '_submitted_by', + 'name', 'age', 'has_kids', 'kids_name', 'kids_age', 'kids_name', + 'kids_age', 'gps', '_gps_latitude', '_gps_longitude', + '_gps_altitude', '_gps_precision', 'web_browsers/firefox', + 'web_browsers/chrome', 'web_browsers/ie', 'web_browsers/safari', + 'instanceID', '_id', '_uuid', '_submission_time', '_date_modified', + '_tags', '_notes', '_version', '_duration', '_submitted_by', '_total_media', '_media_count', '_media_all_received', - '_review_status', '_review_comment', '_review_date'] - + '_review_status', '_review_comment', '_review_date' + ] self.assertEqual(expected_header, header) rows = [] for row in csv_reader: @@ -617,35 +618,37 @@ def test_csv_export_with_labels(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) expected_header = [ - 'name', - 'age', - 'has_kids', - 'gps', - 'web_browsers', - 'instanceID', - '_id', - '_uuid', - '_submission_time', - '_date_modified', - '_tags', - '_notes', - '_version', - '_duration', - '_submitted_by', - '_total_media', - '_media_count', - '_media_all_received', - '_review_status', - '_review_comment', - '_review_date' - ] + 'name', 'age', 'has_kids', 'kids_name', 'kids_age', 'kids_name', + 'kids_age', 'gps', '_gps_latitude', '_gps_longitude', + '_gps_altitude', '_gps_precision', 'web_browsers/firefox', + 'web_browsers/chrome', 'web_browsers/ie', 'web_browsers/safari', + 'instanceID', '_id', '_uuid', '_submission_time', '_date_modified', + '_tags', '_notes', '_version', '_duration', '_submitted_by', + '_total_media', '_media_count', '_media_all_received', + '_review_status', '_review_comment', '_review_date' + ] self.assertEqual(expected_header, header) + labels = next(csv_reader) + self.assertEqual(len(labels), 17 + len(csv_df_builder.extra_columns)) + expected_labels = [ + 'Name', 'age', 'Do you have kids?', 'Kids Name', 'Kids Age', + 'Kids Name', 'Kids Age', '5. Record your GPS coordinates.', + '_gps_latitude', '_gps_longitude', '_gps_altitude', + '_gps_precision', 'web_browsers/Mozilla Firefox', + 'web_browsers/Google Chrome', 'web_browsers/Internet Explorer', + 'web_browsers/Safari', 'instanceID', '_id', '_uuid', + '_submission_time', '_date_modified', '_tags', '_notes', + '_version', '_duration', '_submitted_by', '_total_media', + '_media_count', '_media_all_received', '_review_status', + '_review_comment', '_review_date' + ] + self.assertEqual(expected_labels, labels) rows = [] for row in csv_reader: rows.append(row) - self.assertEqual(len(rows), 8) + self.assertEqual(len(rows), 7) self.assertEqual(rows[4][5], NA_REP) # close and delete file csv_file.close() @@ -676,17 +679,19 @@ def test_csv_export_with_labels_only(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) labels = next(csv_reader) - self.assertEqual(len(labels), 6 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(labels), 17 + len(csv_df_builder.extra_columns)) expected_labels = [ - 'Name', 'age', 'Do you have kids?', - '5. Record your GPS coordinates.', - '6. What web browsers do you use?', - 'instanceID', '_id', '_uuid', '_submission_time', - '_date_modified', '_tags', '_notes', '_version', - '_duration', '_submitted_by', '_total_media', + 'Name', 'age', 'Do you have kids?', 'Kids Name', 'Kids Age', + 'Kids Name', 'Kids Age', '5. Record your GPS coordinates.', + '_gps_latitude', '_gps_longitude', '_gps_altitude', + '_gps_precision', 'web_browsers/Mozilla Firefox', + 'web_browsers/Google Chrome', 'web_browsers/Internet Explorer', + 'web_browsers/Safari', 'instanceID', '_id', '_uuid', + '_submission_time', '_date_modified', '_tags', '_notes', + '_version', '_duration', '_submitted_by', '_total_media', '_media_count', '_media_all_received', '_review_status', - '_review_comment', '_review_date'] - + '_review_comment', '_review_date' + ] self.assertEqual(expected_labels, labels) rows = [] for row in csv_reader: @@ -739,8 +744,7 @@ def test_no_split_select_multiples(self, mock_query_data): include_images=False) # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe( - cursor, is_data=True)] + result = [k for k in csv_df_builder._format_for_dataframe(cursor)] expected_result = [{ 'name': 'Tom', 'age': 23, @@ -775,15 +779,17 @@ def test_csv_export_extra_columns(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) expected_header = [ - 'name', 'age', 'has_kids', 'gps', 'web_browsers', - 'instanceID', '_id', '_uuid', '_submission_time', - '_date_modified', '_tags', '_notes', '_version', - '_duration', '_submitted_by', '_total_media', '_media_count', - '_media_all_received', '_xform_id', '_review_status', - '_review_comment', '_review_date'] - + 'name', 'age', 'has_kids', 'kids_name', 'kids_age', 'kids_name', + 'kids_age', 'gps', '_gps_latitude', '_gps_longitude', + '_gps_altitude', '_gps_precision', 'web_browsers/firefox', + 'web_browsers/chrome', 'web_browsers/ie', 'web_browsers/safari', + 'instanceID', '_id', '_uuid', '_submission_time', '_date_modified', + '_tags', '_notes', '_version', '_duration', '_submitted_by', + '_total_media', '_media_count', '_media_all_received', '_xform_id', + '_review_status', '_review_comment', '_review_date' + ] self.assertEqual(expected_header, header) # close and delete file csv_file.close() @@ -800,7 +806,7 @@ def test_csv_export_extra_columns(self): csv_file = open(temp_file.name, 'r') csv_reader = csv.reader(csv_file) header = next(csv_reader) - self.assertEqual(len(header), 6 + len(csv_df_builder.extra_columns)) + self.assertEqual(len(header), 17 + len(csv_df_builder.extra_columns)) self.assertEqual(expected_header, header) csv_file.close() os.unlink(temp_file.name) @@ -818,8 +824,7 @@ def test_index_tag_replacement(self): self.user.username, self.xform.id_string, include_images=False, index_tags=('_', '_')) cursor = csv_df_builder._query_data() - result = [d for d in csv_df_builder._format_for_dataframe( - cursor, is_data=True)][0] + result = [d for d in csv_df_builder._format_for_dataframe(cursor)][0] # remove dynamic fields ignore_list = [ '_uuid', 'meta/instanceID', 'formhub/uuid', '_submission_time', @@ -899,8 +904,7 @@ def test_show_choice_labels_multi_language(self, mock_query_data): include_images=False, show_choice_labels=True, language='French') # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe( - cursor, is_data=True)] + result = [k for k in csv_df_builder._format_for_dataframe(cursor)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -941,8 +945,7 @@ def test_show_choice_labels_multi_language_1(self, mock_query_data): include_images=False, show_choice_labels=True, language='English') # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe( - cursor, is_data=True)] + result = [k for k in csv_df_builder._format_for_dataframe(cursor)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -982,8 +985,7 @@ def test_show_choice_labels(self, mock_query_data): include_images=False, show_choice_labels=True) # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe( - cursor, is_data=True)] + result = [k for k in csv_df_builder._format_for_dataframe(cursor)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1023,8 +1025,7 @@ def test_show_choice_labels_select_multiple(self, mock_query_data): include_images=False, show_choice_labels=True) # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe( - cursor, is_data=True)] + result = [k for k in csv_df_builder._format_for_dataframe(cursor)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1066,8 +1067,7 @@ def test_show_choice_labels_select_multiple_language(self, include_images=False, show_choice_labels=True, language='Fr') # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe( - cursor, is_data=True)] + result = [k for k in csv_df_builder._format_for_dataframe(cursor)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1110,8 +1110,7 @@ def test_show_choice_labels_select_multiple_1(self, mock_query_data): include_images=False, show_choice_labels=True) # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe( - cursor, is_data=True)] + result = [k for k in csv_df_builder._format_for_dataframe(cursor)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1157,8 +1156,7 @@ def test_show_choice_labels_select_multiple_1_language(self, include_images=False, show_choice_labels=True, language='Fr') # pylint: disable=protected-access cursor = [row for row in csv_df_builder._query_data()] - result = [k for k in csv_df_builder._format_for_dataframe( - cursor, is_data=True)] + result = [k for k in csv_df_builder._format_for_dataframe(cursor)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1202,8 +1200,7 @@ def test_show_choice_labels_select_multiple_2(self, mock_query_data): include_images=False, show_choice_labels=True) # pylint: disable=protected-access cursor = [row for row in csv_df_builder_1._query_data()] - result = [k for k in csv_df_builder_1._format_for_dataframe( - cursor, is_data=True)] + result = [k for k in csv_df_builder_1._format_for_dataframe(cursor)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1234,25 +1231,13 @@ def test_export_data_for_xforms_without_submissions(self): header = next(csv_reader) expected_header = [ - 'info/name', - 'info/age', - 'kids/has_kids', - 'gps', - 'web_browsers', - 'meta/instanceID', - '_id', - '_uuid', - '_submission_time', - '_date_modified', - '_tags', - '_notes', - '_version', - '_duration', - '_submitted_by', - '_total_media', - '_media_count', - '_media_all_received' - ] + 'info/name', 'info/age', 'kids/has_kids', 'gps', '_gps_latitude', + '_gps_longitude', '_gps_altitude', '_gps_precision', + 'web_browsers/firefox', 'web_browsers/chrome', 'web_browsers/ie', + 'web_browsers/safari', 'meta/instanceID', '_id', '_uuid', + '_submission_time', '_date_modified', '_tags', '_notes', + '_version', '_duration', '_submitted_by', '_total_media', + '_media_count', '_media_all_received'] # Test form headers are present on exported csv file. self.assertEqual(header, expected_header) @@ -1260,8 +1245,8 @@ def test_export_data_for_xforms_without_submissions(self): def test_export_raises_NoRecordsFound_for_form_without_instances(self): """ - Test xform schema for form with no submission - is successfully exported + Test exporting records for forms without submissions raises + NorecordsFound exception. """ fixture = "new_repeats" # publish form so we have a dd @@ -1313,8 +1298,7 @@ def test_show_choice_labels_select_multiple_3(self, mock_query_data): include_images=False, show_choice_labels=True) # pylint: disable=protected-access cursor = [row for row in csv_df_builder_1._query_data()] - result = [k for k in csv_df_builder_1._format_for_dataframe( - cursor, is_data=True)] + result = [k for k in csv_df_builder_1._format_for_dataframe(cursor)] expected_result = [{ 'name': 'Maria', 'age': 25, @@ -1375,26 +1359,50 @@ def test_multiple_repeats_column_order(self, mock_query_data): mock_query_data.return_value = data expected_header = [ - 'food', - 'no_food', - 'food_repeat_count', - 'no_food_2', - 'food_repeat_2_count', - 'gps', - 'meta/instanceID', - '_id', - '_uuid', - '_submission_time', - '_date_modified', - '_tags', - '_notes', - '_version', - '_duration', - '_submitted_by', - '_total_media', - '_media_count', - '_media_all_received' - ] + 'food/Apple', 'food/Orange', 'food/Banana', 'food/Pizza', + 'food/Lasgna', 'food/Cake', 'food/Chocolate', 'food/Salad', + 'food/Sandwich', 'no_food', 'food_repeat_count', + 'food_repeat[1]/food_group/Apple', + 'food_repeat[1]/food_group/Orange', + 'food_repeat[1]/food_group/Banana', + 'food_repeat[1]/food_group/Pizza', + 'food_repeat[1]/food_group/Lasgna', + 'food_repeat[1]/food_group/Cake', + 'food_repeat[1]/food_group/Chocolate', + 'food_repeat[1]/food_group/Salad', + 'food_repeat[1]/food_group/Sandwich', + 'food_repeat[2]/food_group/Apple', + 'food_repeat[2]/food_group/Orange', + 'food_repeat[2]/food_group/Banana', + 'food_repeat[2]/food_group/Pizza', + 'food_repeat[2]/food_group/Lasgna', + 'food_repeat[2]/food_group/Cake', + 'food_repeat[2]/food_group/Chocolate', + 'food_repeat[2]/food_group/Salad', + 'food_repeat[2]/food_group/Sandwich', 'no_food_2', + 'food_repeat_2_count', 'food_repeat_2[1]/food_group_2/Apple', + 'food_repeat_2[1]/food_group_2/Orange', + 'food_repeat_2[1]/food_group_2/Banana', + 'food_repeat_2[1]/food_group_2/Pizza', + 'food_repeat_2[1]/food_group_2/Lasgna', + 'food_repeat_2[1]/food_group_2/Cake', + 'food_repeat_2[1]/food_group_2/Chocolate', + 'food_repeat_2[1]/food_group_2/Salad', + 'food_repeat_2[1]/food_group_2/Sandwich', + 'food_repeat_2[2]/food_group_2/Apple', + 'food_repeat_2[2]/food_group_2/Orange', + 'food_repeat_2[2]/food_group_2/Banana', + 'food_repeat_2[2]/food_group_2/Pizza', + 'food_repeat_2[2]/food_group_2/Lasgna', + 'food_repeat_2[2]/food_group_2/Cake', + 'food_repeat_2[2]/food_group_2/Chocolate', + 'food_repeat_2[2]/food_group_2/Salad', + 'food_repeat_2[2]/food_group_2/Sandwich', 'gps', + '_gps_latitude', '_gps_longitude', '_gps_altitude', + '_gps_precision', 'meta/instanceID', '_id', '_uuid', + '_submission_time', '_date_modified', '_tags', + '_notes', '_version', '_duration', '_submitted_by', + '_total_media', '_media_count', '_media_all_received'] csv_df_builder = CSVDataFrameBuilder( self.user.username, self.xform.id_string, include_images=False) diff --git a/onadata/libs/utils/csv_builder.py b/onadata/libs/utils/csv_builder.py index f9148c7517..3f7fa29acc 100644 --- a/onadata/libs/utils/csv_builder.py +++ b/onadata/libs/utils/csv_builder.py @@ -1,5 +1,5 @@ -from collections import OrderedDict from itertools import chain +from collections import OrderedDict import unicodecsv as csv from django.conf import settings @@ -533,20 +533,22 @@ def _build_ordered_columns(cls, survey_element, ordered_columns, # generated when we reindex ordered_columns[child.get_abbreviated_xpath()] = None - def _format_for_dataframe(self, cursor, is_data=False): + def _update_ordered_columns_from_data(self, cursor): """ - Adds ordered columns for select multiples data. - Expounds gps/geopoint data. + Populates `self.ordered_columns` object that is + used to generate export column headers for + forms that split select multiple and gps data. """ # add ordered columns for select multiples if self.split_select_multiples: for (key, choices) in iteritems(self.select_multiples): # HACK to ensure choices are NOT duplicated - self.ordered_columns[key] = \ - remove_dups_from_list_maintain_order( - [choice.replace('/' + name, '/' + label) - if self.show_choice_labels else choice - for choice, name, label in choices]) + if key in self.ordered_columns.keys(): + self.ordered_columns[key] = \ + remove_dups_from_list_maintain_order( + [choice.replace('/' + name, '/' + label) + if self.show_choice_labels else choice + for choice, name, label in choices]) # add ordered columns for gps fields for key in self.gps_fields: @@ -555,50 +557,63 @@ def _format_for_dataframe(self, cursor, is_data=False): image_xpaths = [] if not self.include_images \ else self.dd.get_media_survey_xpaths() - flat_dict = {} - if is_data: - for record in cursor: - # split select multiples - if self.split_select_multiples: - record = self._split_select_multiples( - record, self.select_multiples, - self.BINARY_SELECT_MULTIPLES, - self.VALUE_SELECT_MULTIPLES, - show_choice_labels=self.show_choice_labels) - # check for gps and split into - # components i.e. latitude, longitude, - # altitude, precision - self._split_gps_fields(record, self.gps_fields) - self._tag_edit_string(record) - # re index repeats - for (key, value) in iteritems(record): - self._reindex( - key, value, self.ordered_columns, record, self.dd, - include_images=image_xpaths, - split_select_multiples=self.split_select_multiples, - index_tags=self.index_tags, - show_choice_labels=self.show_choice_labels, - language=self.language) - flat_dict.update(record) - - yield flat_dict + # add ordered columns for nested repeat data + for record in cursor: + # re index column repeats + for (key, value) in iteritems(record): + self._reindex( + key, value, self.ordered_columns, record, self.dd, + include_images=image_xpaths, + split_select_multiples=self.split_select_multiples, + index_tags=self.index_tags, + show_choice_labels=self.show_choice_labels, + language=self.language) + + def _format_for_dataframe(self, cursor): + """ + Unpacks nested repeat data for export. + """ + image_xpaths = [] if not self.include_images \ + else self.dd.get_media_survey_xpaths() + for record in cursor: + # split select multiples + if self.split_select_multiples: + record = self._split_select_multiples( + record, self.select_multiples, + self.BINARY_SELECT_MULTIPLES, + self.VALUE_SELECT_MULTIPLES, + show_choice_labels=self.show_choice_labels) + # check for gps and split into + # components i.e. latitude, longitude, + # altitude, precision + self._split_gps_fields(record, self.gps_fields) + self._tag_edit_string(record) + flat_dict = {} + # re index repeats + for (key, value) in iteritems(record): + reindexed = self._reindex( + key, value, self.ordered_columns, record, self.dd, + include_images=image_xpaths, + split_select_multiples=self.split_select_multiples, + index_tags=self.index_tags, + show_choice_labels=self.show_choice_labels, + language=self.language) + flat_dict.update(reindexed) + yield flat_dict def export_to(self, path, dataview=None): self.ordered_columns = OrderedDict() self._build_ordered_columns(self.dd.survey, self.ordered_columns) - is_form_data = False if dataview: - # Export Dataview object data by default - is_form_data = True - cursor = dataview.query_data(dataview, all_data=True, filter_query=self.filter_query) if isinstance(cursor, QuerySet): cursor = cursor.iterator() - data = self._format_for_dataframe( - cursor, is_data=is_form_data) + self._update_ordered_columns_from_data(cursor) + + data = self._format_for_dataframe(cursor) columns = list(chain.from_iterable( [[xpath] if cols is None else cols @@ -609,27 +624,30 @@ def export_to(self, path, dataview=None): try: cursor = self._query_data(self.filter_query) except NoRecordsFoundError: - # Set cursor (XForm data) to xform schema - cursor = self.xform.get_headers() + # Set cursor object to an an empty queryset + cursor = self.xform.instances.none() + # Define export columns using xform schema + columns = self.xform.get_headers() + + self._update_ordered_columns_from_data(cursor) + + if isinstance(cursor, QuerySet): + cursor = cursor.iterator() + + # Unpack xform columns and data + data = self._format_for_dataframe(cursor) columns = list(chain.from_iterable( [[xpath] if cols is None else cols for (xpath, cols) in iteritems(self.ordered_columns)])) + # add extra columns + columns += [col for col in self.extra_columns] + for field in self.dd.get_survey_elements_of_type('osm'): columns += OsmData.get_tag_keys(self.xform, field.get_abbreviated_xpath(), include_prefix=True) - # add extra columns - columns += [col for col in self.extra_columns] - - if isinstance(cursor, QuerySet): - cursor = cursor.iterator() - is_form_data = True - - # Unpack xform columns and data - data = self._format_for_dataframe( - cursor, is_data=is_form_data) columns_with_hxl = self.include_hxl and get_columns_with_hxl( self.dd.survey_elements) From ad7e09f4a9b39bf6701a9edd6ec4a21de5dc75b8 Mon Sep 17 00:00:00 2001 From: WinnyTroy Date: Wed, 22 Sep 2021 09:20:11 +0300 Subject: [PATCH 10/10] Add tests for exports with newer submissions to check that column headers are re-created for newer export --- onadata/libs/tests/utils/test_csv_builder.py | 67 ++++++++++++++++++++ onadata/libs/utils/csv_builder.py | 12 ++-- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/onadata/libs/tests/utils/test_csv_builder.py b/onadata/libs/tests/utils/test_csv_builder.py index 47491404ea..684817970b 100644 --- a/onadata/libs/tests/utils/test_csv_builder.py +++ b/onadata/libs/tests/utils/test_csv_builder.py @@ -1243,6 +1243,73 @@ def test_export_data_for_xforms_without_submissions(self): csv_file.close() + def test_export_data_for_xforms_with_newer_submissions(self): + """ + Test xform schema for form with no submission + is successfully exported + """ + fixture = "new_repeats" + # publish form so we have a dd + self._publish_xls_fixture_set_xform(fixture) + + # Confirm form has not submissions so far + self.assertEquals(self.xform.instances.count(), 0) + # Generate csv export for form + csv_df_builder = CSVDataFrameBuilder( + self.user.username, self.xform.id_string, include_images=False) + temp_file = NamedTemporaryFile(suffix=".csv", delete=False) + csv_df_builder.export_to(temp_file.name) + csv_file = open(temp_file.name, 'r') + csv_reader = csv.reader(csv_file) + header = next(csv_reader) + + expected_header = [ + 'info/name', 'info/age', 'kids/has_kids', 'gps', '_gps_latitude', + '_gps_longitude', '_gps_altitude', '_gps_precision', + 'web_browsers/firefox', 'web_browsers/chrome', 'web_browsers/ie', + 'web_browsers/safari', 'meta/instanceID', '_id', '_uuid', + '_submission_time', '_date_modified', '_tags', '_notes', + '_version', '_duration', '_submitted_by', '_total_media', + '_media_count', '_media_all_received'] + # Test form headers are present on exported csv file. + self.assertEqual(header, expected_header) + + # make sibmissions to xform after export was generated + for _ in range(4): + self._submit_fixture_instance("new_repeats", "01") + self._submit_fixture_instance("new_repeats", "02") + # pylint: disable=protected-access + record_count = csv_df_builder._query_data(count=True) + self.assertEqual(record_count, 5) + temp_file = NamedTemporaryFile(suffix=".csv", delete=False) + csv_df_builder.export_to(temp_file.name) + csv_file = open(temp_file.name, 'r') + csv_reader = csv.reader(csv_file) + newer_header = next(csv_reader) + expected_headers = [ + 'info/name', 'info/age', 'kids/has_kids', + 'kids/kids_details[1]/kids_name', 'kids/kids_details[1]/kids_age', + 'kids/kids_details[2]/kids_name', 'kids/kids_details[2]/kids_age', + 'gps', '_gps_latitude', '_gps_longitude', '_gps_altitude', + '_gps_precision', 'web_browsers/firefox', 'web_browsers/chrome', + 'web_browsers/ie', 'web_browsers/safari', 'meta/instanceID', '_id', + '_uuid', '_submission_time', '_date_modified', '_tags', '_notes', + '_version', '_duration', '_submitted_by', '_total_media', + '_media_count', '_media_all_received'] + + # Test export headers are recreated with repeat data. + self.assertEqual(newer_header, expected_headers) + + self.assertEqual(len(header), 13 + len(csv_df_builder.extra_columns)) + rows = [] + for row in csv_reader: + rows.append(row) + self.assertEqual(len(rows), 5) + self.assertEqual(rows[4][5], NA_REP) + + # close and delete file + csv_file.close() + def test_export_raises_NoRecordsFound_for_form_without_instances(self): """ Test exporting records for forms without submissions raises diff --git a/onadata/libs/utils/csv_builder.py b/onadata/libs/utils/csv_builder.py index 3f7fa29acc..089a11e150 100644 --- a/onadata/libs/utils/csv_builder.py +++ b/onadata/libs/utils/csv_builder.py @@ -397,6 +397,8 @@ def __init__(self, username, id_string, filter_query=None, show_choice_labels, include_reviews, language) self.ordered_columns = OrderedDict() + self.image_xpaths = [] if not self.include_images \ + else self.dd.get_media_survey_xpaths() def _setup(self): super(CSVDataFrameBuilder, self)._setup() @@ -554,8 +556,6 @@ def _update_ordered_columns_from_data(self, cursor): for key in self.gps_fields: gps_xpaths = self.dd.get_additional_geopoint_xpaths(key) self.ordered_columns[key] = [key] + gps_xpaths - image_xpaths = [] if not self.include_images \ - else self.dd.get_media_survey_xpaths() # add ordered columns for nested repeat data for record in cursor: @@ -563,7 +563,7 @@ def _update_ordered_columns_from_data(self, cursor): for (key, value) in iteritems(record): self._reindex( key, value, self.ordered_columns, record, self.dd, - include_images=image_xpaths, + include_images=self.image_xpaths, split_select_multiples=self.split_select_multiples, index_tags=self.index_tags, show_choice_labels=self.show_choice_labels, @@ -573,8 +573,6 @@ def _format_for_dataframe(self, cursor): """ Unpacks nested repeat data for export. """ - image_xpaths = [] if not self.include_images \ - else self.dd.get_media_survey_xpaths() for record in cursor: # split select multiples if self.split_select_multiples: @@ -593,7 +591,7 @@ def _format_for_dataframe(self, cursor): for (key, value) in iteritems(record): reindexed = self._reindex( key, value, self.ordered_columns, record, self.dd, - include_images=image_xpaths, + include_images=self.image_xpaths, split_select_multiples=self.split_select_multiples, index_tags=self.index_tags, show_choice_labels=self.show_choice_labels, @@ -626,8 +624,6 @@ def export_to(self, path, dataview=None): except NoRecordsFoundError: # Set cursor object to an an empty queryset cursor = self.xform.instances.none() - # Define export columns using xform schema - columns = self.xform.get_headers() self._update_ordered_columns_from_data(cursor)