From 4f3746110893096b73ee032f7a5a5a1c1b88ef3b Mon Sep 17 00:00:00 2001 From: Davis Muro Date: Tue, 17 Jan 2023 13:30:01 +0300 Subject: [PATCH] Update `sheet_to_csv` function - Ensure headers are not duplicated - Ensure that integers & datetimes are correctly handler. - Ensure empty values are not processed - Utilize PyXForm `xlsx_value_to_str` to convert XLSX values - Update tests --- .../api/tests/viewsets/test_xform_viewset.py | 16 +++--- onadata/apps/viewer/models/data_dictionary.py | 50 ++++++------------- 2 files changed, 22 insertions(+), 44 deletions(-) diff --git a/onadata/apps/api/tests/viewsets/test_xform_viewset.py b/onadata/apps/api/tests/viewsets/test_xform_viewset.py index d782ac536d..dd5bbdbfe2 100644 --- a/onadata/apps/api/tests/viewsets/test_xform_viewset.py +++ b/onadata/apps/api/tests/viewsets/test_xform_viewset.py @@ -5506,15 +5506,14 @@ def test_external_choice_integer_name_xlsform(self): self.assertIsNotNone(metadata) csv_reader = csv.reader(codecs.iterdecode(metadata.data_file, "utf-8")) - header = next(csv_reader) expected_data = [ ["list_name", "name", "label", "state", "county"], - ["states", "1", "Texas"], - ["states", "2", "Washington"], - ["counties", "b1", "King", "2"], - ["counties", "b2", "Pierce", "2"], - ["counties", "b3", "King", "1"], - ["counties", "b4", "Cameron", "1"], + ["states", "1", "Texas", "", ""], + ["states", "2", "Washington", "", ""], + ["counties", "b1", "King", "2", ""], + ["counties", "b2", "Pierce", "2", ""], + ["counties", "b3", "King", "1", ""], + ["counties", "b4", "Cameron", "1", ""], ["cities", "dumont", "Dumont", "1", "b3"], ["cities", "finney", "Finney", "1", "b3"], ["cities", "brownsville", "brownsville", "1", "b4"], @@ -5524,8 +5523,7 @@ def test_external_choice_integer_name_xlsform(self): ["cities", "tacoma", "Tacoma", "2", "b2"], ["cities", "puyallup", "Puyallup", "2", "b2"], ] - self.assertEqual(header, expected_data[0]) - for index, row in enumerate(csv_reader, start=1): + for index, row in enumerate(csv_reader): self.assertEqual(row, expected_data[index]) def test_csv_xls_import_errors(self): diff --git a/onadata/apps/viewer/models/data_dictionary.py b/onadata/apps/viewer/models/data_dictionary.py index 4de9a2eeaa..856ea96868 100644 --- a/onadata/apps/viewer/models/data_dictionary.py +++ b/onadata/apps/viewer/models/data_dictionary.py @@ -5,17 +5,19 @@ import os from io import BytesIO, StringIO -import unicodecsv as csv -import openpyxl from django.core.files.uploadedfile import InMemoryUploadedFile from django.db.models.signals import post_save, pre_save from django.utils import timezone from django.utils.translation import gettext as _ + +import openpyxl +import unicodecsv as csv from floip import FloipSurvey from kombu.exceptions import OperationalError from pyxform.builder import create_survey_element_from_dict from pyxform.utils import has_external_choices from pyxform.xls2json import parse_file_to_json +from pyxform.xls2json_backends import xlsx_value_to_str from onadata.apps.logger.models.xform import XForm, check_version_set, check_xform_uuid from onadata.apps.logger.xform_instance_parser import XLSFormError @@ -86,41 +88,19 @@ def sheet_to_csv(xls_content, sheet_name): writer = csv.writer(csv_file, encoding="utf-8", quoting=csv.QUOTE_ALL) mask = [v and len(v.strip()) > 0 for v in list(sheet.values)[0]] - header = [v for v, m in zip(list(sheet.values)[0], mask) if m] - writer.writerow(header) - - name_column = None - try: - name_column = header.index("name") - except ValueError: - pass - - integer_fields = False - date_fields = False - if name_column: - for index in range(1, sheet.max_column): - if sheet.cell(index, name_column).data_type == "n": - integer_fields = True - elif sheet.cell(index, name_column).is_date: - date_fields = True - - for row, value in enumerate(sheet.iter_rows()): - if integer_fields or date_fields: - # convert integers to string/datetime if name has numbers/dates - row_values = [] - for index, val in enumerate(value): - if sheet.cell(row, index).data_type == "n": - try: - val = str(float(val) if (float(val) > int(val)) else int(val)) - except ValueError: - pass - elif sheet.cell(row, index).is_date: - val = val.strftime("%Y-%m-%d").isoformat() + for row in sheet.iter_rows(values_only=True): + row_values = [] + try: + for val in row: + if val is not None: + val = xlsx_value_to_str(val) + val = val.strip() row_values.append(val) + except TypeError: + continue + + if not all(v is None for v in row_values): writer.writerow([v for v, m in zip(row_values, mask) if m]) - else: - single_row = [cell.value for cell in value] - writer.writerow([v for v, m in zip(single_row, mask) if m]) return csv_file