Skip to content

Commit

Permalink
Update sheet_to_csv function
Browse files Browse the repository at this point in the history
- Ensure headers are not duplicated
- Ensure that integers & datetimes are correctly handler.
- Ensure empty values are not processed
  • Loading branch information
Davis Muro committed Jan 17, 2023
1 parent 0a61297 commit 063618e
Showing 1 changed file with 21 additions and 28 deletions.
49 changes: 21 additions & 28 deletions onadata/apps/viewer/models/data_dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
import os
from io import BytesIO, StringIO

import unicodecsv as csv
import openpyxl
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.db.models.signals import post_save, pre_save
from django.utils import timezone
from django.utils.translation import gettext as _

import openpyxl
import unicodecsv as csv
from floip import FloipSurvey
from kombu.exceptions import OperationalError
from pyxform.builder import create_survey_element_from_dict
Expand Down Expand Up @@ -89,38 +90,30 @@ def sheet_to_csv(xls_content, sheet_name):
header = [v for v, m in zip(list(sheet.values)[0], mask) if m]
writer.writerow(header)

name_column = None
try:
name_column = header.index("name")
except ValueError:
pass

integer_fields = False
date_fields = False
if name_column:
for index in range(1, sheet.max_column):
if sheet.cell(index, name_column).data_type == "n":
integer_fields = True
elif sheet.cell(index, name_column).is_date:
date_fields = True

for row, value in enumerate(sheet.iter_rows()):
if integer_fields or date_fields:
# convert integers to string/datetime if name has numbers/dates
row_values = []
for index, val in enumerate(value):
if sheet.cell(row, index).data_type == "n":
# Start at 1 since the columns & rows in a sheet are 1-based arrays
for row, value in enumerate(sheet.iter_rows(values_only=True), start=1):
# Skip first row as it's just the headers
if row > 1:
continue

row_values = []

# Start at 1 since the columns & rows in a sheet a 1-based arrays
for col, val in enumerate(value, start=1):
cell = sheet.cell(row, col)
if val is not None:
# Try and convert cell to string if it's numeric or a date
if cell.data_type == "n":
try:
val = str(float(val) if (float(val) > int(val)) else int(val))
except ValueError:
pass
elif sheet.cell(row, index).is_date:
elif cell.is_date:
val = val.strftime("%Y-%m-%d").isoformat()

row_values.append(val)
writer.writerow([v for v, m in zip(row_values, mask) if m])
else:
single_row = [cell.value for cell in value]
writer.writerow([v for v, m in zip(single_row, mask) if m])

writer.writerow([v for v, m in zip(row_values, mask) if m])
return csv_file


Expand Down

0 comments on commit 063618e

Please sign in to comment.