From 063618e5ce037f4426df7425f4a55da0d738fa60 Mon Sep 17 00:00:00 2001 From: Davis Muro Date: Tue, 17 Jan 2023 13:30:01 +0300 Subject: [PATCH] Update `sheet_to_csv` function - Ensure headers are not duplicated - Ensure that integers & datetimes are correctly handler. - Ensure empty values are not processed --- onadata/apps/viewer/models/data_dictionary.py | 49 ++++++++----------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/onadata/apps/viewer/models/data_dictionary.py b/onadata/apps/viewer/models/data_dictionary.py index 4de9a2eeaa..73122124df 100644 --- a/onadata/apps/viewer/models/data_dictionary.py +++ b/onadata/apps/viewer/models/data_dictionary.py @@ -5,12 +5,13 @@ import os from io import BytesIO, StringIO -import unicodecsv as csv -import openpyxl from django.core.files.uploadedfile import InMemoryUploadedFile from django.db.models.signals import post_save, pre_save from django.utils import timezone from django.utils.translation import gettext as _ + +import openpyxl +import unicodecsv as csv from floip import FloipSurvey from kombu.exceptions import OperationalError from pyxform.builder import create_survey_element_from_dict @@ -89,38 +90,30 @@ def sheet_to_csv(xls_content, sheet_name): header = [v for v, m in zip(list(sheet.values)[0], mask) if m] writer.writerow(header) - name_column = None - try: - name_column = header.index("name") - except ValueError: - pass - - integer_fields = False - date_fields = False - if name_column: - for index in range(1, sheet.max_column): - if sheet.cell(index, name_column).data_type == "n": - integer_fields = True - elif sheet.cell(index, name_column).is_date: - date_fields = True - - for row, value in enumerate(sheet.iter_rows()): - if integer_fields or date_fields: - # convert integers to string/datetime if name has numbers/dates - row_values = [] - for index, val in enumerate(value): - if sheet.cell(row, index).data_type == "n": + # Start at 1 since the columns & rows in a sheet are 1-based arrays + for row, value in enumerate(sheet.iter_rows(values_only=True), start=1): + # Skip first row as it's just the headers + if row > 1: + continue + + row_values = [] + + # Start at 1 since the columns & rows in a sheet a 1-based arrays + for col, val in enumerate(value, start=1): + cell = sheet.cell(row, col) + if val is not None: + # Try and convert cell to string if it's numeric or a date + if cell.data_type == "n": try: val = str(float(val) if (float(val) > int(val)) else int(val)) except ValueError: pass - elif sheet.cell(row, index).is_date: + elif cell.is_date: val = val.strftime("%Y-%m-%d").isoformat() + row_values.append(val) - writer.writerow([v for v, m in zip(row_values, mask) if m]) - else: - single_row = [cell.value for cell in value] - writer.writerow([v for v, m in zip(single_row, mask) if m]) + + writer.writerow([v for v, m in zip(row_values, mask) if m]) return csv_file