Skip to content

Commit

Permalink
Update sheet_to_csv function
Browse files Browse the repository at this point in the history
- Ensure headers are not duplicated
- Ensure that integers & datetimes are correctly handler.
- Ensure empty values are not processed
- Utilize PyXForm `xlsx_value_to_str` to convert XLSX values
- Update tests
  • Loading branch information
Davis Muro committed Jan 19, 2023
1 parent 366f063 commit 4f37461
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 44 deletions.
16 changes: 7 additions & 9 deletions onadata/apps/api/tests/viewsets/test_xform_viewset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5506,15 +5506,14 @@ def test_external_choice_integer_name_xlsform(self):
self.assertIsNotNone(metadata)

csv_reader = csv.reader(codecs.iterdecode(metadata.data_file, "utf-8"))
header = next(csv_reader)
expected_data = [
["list_name", "name", "label", "state", "county"],
["states", "1", "Texas"],
["states", "2", "Washington"],
["counties", "b1", "King", "2"],
["counties", "b2", "Pierce", "2"],
["counties", "b3", "King", "1"],
["counties", "b4", "Cameron", "1"],
["states", "1", "Texas", "", ""],
["states", "2", "Washington", "", ""],
["counties", "b1", "King", "2", ""],
["counties", "b2", "Pierce", "2", ""],
["counties", "b3", "King", "1", ""],
["counties", "b4", "Cameron", "1", ""],
["cities", "dumont", "Dumont", "1", "b3"],
["cities", "finney", "Finney", "1", "b3"],
["cities", "brownsville", "brownsville", "1", "b4"],
Expand All @@ -5524,8 +5523,7 @@ def test_external_choice_integer_name_xlsform(self):
["cities", "tacoma", "Tacoma", "2", "b2"],
["cities", "puyallup", "Puyallup", "2", "b2"],
]
self.assertEqual(header, expected_data[0])
for index, row in enumerate(csv_reader, start=1):
for index, row in enumerate(csv_reader):
self.assertEqual(row, expected_data[index])

def test_csv_xls_import_errors(self):
Expand Down
50 changes: 15 additions & 35 deletions onadata/apps/viewer/models/data_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@
import os
from io import BytesIO, StringIO

import unicodecsv as csv
import openpyxl
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.db.models.signals import post_save, pre_save
from django.utils import timezone
from django.utils.translation import gettext as _

import openpyxl
import unicodecsv as csv
from floip import FloipSurvey
from kombu.exceptions import OperationalError
from pyxform.builder import create_survey_element_from_dict
from pyxform.utils import has_external_choices
from pyxform.xls2json import parse_file_to_json
from pyxform.xls2json_backends import xlsx_value_to_str

from onadata.apps.logger.models.xform import XForm, check_version_set, check_xform_uuid
from onadata.apps.logger.xform_instance_parser import XLSFormError
Expand Down Expand Up @@ -86,41 +88,19 @@ def sheet_to_csv(xls_content, sheet_name):
writer = csv.writer(csv_file, encoding="utf-8", quoting=csv.QUOTE_ALL)
mask = [v and len(v.strip()) > 0 for v in list(sheet.values)[0]]

header = [v for v, m in zip(list(sheet.values)[0], mask) if m]
writer.writerow(header)

name_column = None
try:
name_column = header.index("name")
except ValueError:
pass

integer_fields = False
date_fields = False
if name_column:
for index in range(1, sheet.max_column):
if sheet.cell(index, name_column).data_type == "n":
integer_fields = True
elif sheet.cell(index, name_column).is_date:
date_fields = True

for row, value in enumerate(sheet.iter_rows()):
if integer_fields or date_fields:
# convert integers to string/datetime if name has numbers/dates
row_values = []
for index, val in enumerate(value):
if sheet.cell(row, index).data_type == "n":
try:
val = str(float(val) if (float(val) > int(val)) else int(val))
except ValueError:
pass
elif sheet.cell(row, index).is_date:
val = val.strftime("%Y-%m-%d").isoformat()
for row in sheet.iter_rows(values_only=True):
row_values = []
try:
for val in row:
if val is not None:
val = xlsx_value_to_str(val)
val = val.strip()
row_values.append(val)
except TypeError:
continue

if not all(v is None for v in row_values):
writer.writerow([v for v, m in zip(row_values, mask) if m])
else:
single_row = [cell.value for cell in value]
writer.writerow([v for v, m in zip(single_row, mask) if m])
return csv_file


Expand Down

0 comments on commit 4f37461

Please sign in to comment.