Skip to content

Commit

Permalink
Update uploader to accommodate sanparks template (#4038)
Browse files Browse the repository at this point in the history
* Update uploader to accommodate sanparks template

* Fix upload issues
  • Loading branch information
dimasciput committed Jul 11, 2024
1 parent d47e091 commit 86ac1f1
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 57 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Generated by Django 4.2.11 on 2024-07-03 14:22

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('bims', '0419_rename_role_link_profile_role'),
]

operations = [
migrations.RemoveField(
model_name='biologicalcollectionrecord',
name='additional_observation_sites',
),
migrations.RemoveField(
model_name='locationsite',
name='additional_observation_sites',
),
migrations.AddField(
model_name='biologicalcollectionrecord',
name='data_type',
field=models.CharField(blank=True, choices=[('private', 'Private'), ('public', 'Public'), ('sensitive', 'Sensitive')], default='', help_text='Specify data sharing level of this record', max_length=128),
),
migrations.AddField(
model_name='biologicalcollectionrecord',
name='date_accuracy',
field=models.CharField(blank=True, choices=[('accurate', 'Accurate'), ('artificial', 'Artificial')], default='', help_text='Indicate if date is accurate or artificial', max_length=64),
),
migrations.AlterField(
model_name='sitesetting',
name='iucn_api_key',
field=models.CharField(blank=True, default='', help_text='Token key for IUCN api', max_length=255),
),
migrations.AlterField(
model_name='sitesetting',
name='recaptcha_secret_key',
field=models.CharField(blank=True, default='', max_length=150),
),
migrations.AlterField(
model_name='sitesetting',
name='recaptcha_site_key',
field=models.CharField(blank=True, default='', max_length=150),
),
migrations.AlterField(
model_name='sitesetting',
name='site_code_generator',
field=models.CharField(blank=True, choices=[('bims', 'BIMS (2 Site Name + 2 Site Description + Site count)'), ('fbis', 'FBIS (2 Secondary catchment + 4 River + Site count)'), ('rbis', 'RBIS (Catchment + Province ID + District ID + Site count)'), ('sanparks', 'SANPARKS (1st three park name + 1st two site description + site count)')], default='bims', help_text='How site code generated', max_length=50),
),
]
39 changes: 27 additions & 12 deletions bims/models/biological_collection_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,17 @@ class BiologicalCollectionRecord(AbstractValidation):
(ABUNDANCE_TYPE_DENSITY_CELLS_ML, 'Density (cells/mL)'),
)

DATA_TYPE_CHOICES = (
('private', 'Private'),
('public', 'Public'),
('sensitive', 'Sensitive'),
)

DATE_ACCURACY_CHOICES = (
('accurate', 'Accurate'),
('artificial', 'Artificial'),
)

site = models.ForeignKey(
LocationSite,
models.CASCADE,
Expand All @@ -182,18 +193,6 @@ class BiologicalCollectionRecord(AbstractValidation):
verbose_name='collector or observer',
)

additional_observation_sites = models.ManyToManyField(
to=Site,
related_name='additional_observation_sites',
blank=True,
help_text="List of sites where this biological occurrence has also been observed. "
"This attribute allows for recording multiple observation locations beyond "
"the primary source site. For instance, if an occurrence is recorded at the "
"main location 'FBIS' and is also observed at 'SanParks', "
"this field facilitates linking the occurrence to 'SanParks' as "
"an additional observation site."
)

notes = models.TextField(
blank=True,
default='',
Expand Down Expand Up @@ -369,6 +368,22 @@ class BiologicalCollectionRecord(AbstractValidation):
null=True
)

data_type = models.CharField(
max_length=128,
blank=True,
choices=DATA_TYPE_CHOICES,
default='',
help_text='Specify data sharing level of this record'
)

date_accuracy = models.CharField(
max_length=64,
blank=True,
choices=DATE_ACCURACY_CHOICES,
default='',
help_text='Indicate if date is accurate or artificial'
)

@property
def data_name(self):
return self.original_species_name
Expand Down
14 changes: 2 additions & 12 deletions bims/models/location_site.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,6 @@ class LocationSite(AbstractValidation):
default=HYDROGEOMORPHIC_NONE,
choices=HYDROGEOMORPHIC_CHOICES
)
additional_observation_sites = models.ManyToManyField(
to=Site,
related_name='location_site_additional_observation_sites',
blank=True,
help_text="List of sites where this location site has also been observed. "
"This attribute allows for recording multiple observation locations beyond "
"the primary source site. For instance, if an occurrence is recorded at the "
"main location 'FBIS' and is also observed at 'SanParks', "
"this field facilitates linking the location site to 'SanParks' as "
"an additional observation site."
)

@property
def location_site_identifier(self):
Expand Down Expand Up @@ -596,7 +585,8 @@ def generate_site_code(
site_name_length = 3
if project_name in ['bims', 'sanparks'] and (site_name or site_description):
catchment_site_code = site_name[:site_name_length].upper()
catchment_site_code += site_description[:2].upper()
if project_name == 'bims':
catchment_site_code += site_description[:2].upper()
elif location_site:
catchment_site_code += location_site.name[:site_name_length].upper()
catchment_site_code += location_site.site_description[:4].upper()
Expand Down
6 changes: 6 additions & 0 deletions bims/scripts/collection_csv_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,9 @@

# Sanparks
PARK_OR_MPA_NAME = 'Park or MPA name'
ACCURACY_OF_COORDINATES = 'Accuracy of coordinates'

VERBATUM_NAME = 'Verbatum Name (Scientific or common)'
CERTAINTY_OF_IDENTIFICATION = 'Certainty of identification'
DATE_ACCURACY = 'Date Accuracy'
DATA_TYPE = 'Data type'
117 changes: 88 additions & 29 deletions bims/scripts/occurrences_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ class OccurrenceProcessor(object):

site_ids = []
module_group = None
source_site = None
# Whether the script should also fetch location context after ingesting
# collection data
fetch_location_context = True
Expand Down Expand Up @@ -277,6 +276,12 @@ def location_site(self, record):

park_name = DataCSVUpload.row_value(record, PARK_OR_MPA_NAME)

accuracy_of_coordinates = DataCSVUpload.row_value(
record, ACCURACY_OF_COORDINATES
)
if not accuracy_of_coordinates:
accuracy_of_coordinates = 100

if not longitude and not latitude and park_name:
wfs_url = preferences.SiteSetting.park_wfs_url
layer_name = preferences.SiteSetting.park_wfs_layer_name
Expand All @@ -287,22 +292,39 @@ def location_site(self, record):
latitude = self.park_centroid[park_name][0]
longitude = self.park_centroid[park_name][1]
else:
park_centroid = get_feature_centroid(
wfs_url,
layer_name,
attribute_key=attribute_key,
attribute_value=attribute_value
)
if park_centroid:
latitude = park_centroid[0]
longitude = park_centroid[1]
self.park_centroid[park_name] = park_centroid
# Check if there is already site with the same park name
site = LocationSite.objects.filter(
name=park_name
).first()
if site:
latitude = site.latitude
longitude = site.longitude
self.park_centroid[site.name] = [latitude, longitude]
# Check if site with same park name and accuracy of coordinates exists
site = LocationSite.objects.filter(
name=park_name,
accuracy_of_locality=int(accuracy_of_coordinates)
).exclude(site_code='').first()
if site:
# Return existing site
return site
else:
self.handle_error(
row=record,
message='Park or MPA name does not exist in the database'
park_centroid = get_feature_centroid(
wfs_url,
layer_name,
attribute_key=attribute_key,
attribute_value=attribute_value
)
return None
if park_centroid:
latitude = park_centroid[0]
longitude = park_centroid[1]
self.park_centroid[park_name] = park_centroid
else:
self.handle_error(
row=record,
message='Park or MPA name does not exist in the database'
)
return None

if not longitude or not latitude:
self.handle_error(
Expand All @@ -312,8 +334,8 @@ def location_site(self, record):
return None

try:
latitude = float(DataCSVUpload.row_value(record, LATITUDE))
longitude = float(DataCSVUpload.row_value(record, LONGITUDE))
latitude = float(latitude)
longitude = float(longitude)
except ValueError:
self.handle_error(
row=record,
Expand All @@ -334,6 +356,8 @@ def location_site(self, record):
location_site_name = DataCSVUpload.row_value(record, LOCATION_SITE)
elif wetland_name:
location_site_name = wetland_name
elif park_name:
location_site_name = park_name

# Find existing location site by data source site code
data_source = preferences.SiteSetting.default_data_source.upper()
Expand Down Expand Up @@ -396,9 +420,15 @@ def location_site(self, record):
lat=location_site.latitude,
lon=location_site.longitude,
ecosystem_type=location_site.ecosystem_type,
wetland_name=user_wetland_name
wetland_name=user_wetland_name,
**{
'site_desc': site_description,
'site_name': location_site_name
}
)
location_site.site_code = site_code
if accuracy_of_coordinates:
location_site.accuracy_of_locality = int(accuracy_of_coordinates)
location_site.save()
return location_site

Expand Down Expand Up @@ -758,6 +788,10 @@ def process_data(self, row):
record_type = RecordType.objects.filter(
name__iexact=record_type
).first()
if not record_type:
record_type = RecordType.objects.create(
name=record_type
)
else:
record_type = None
optional_data['record_type'] = record_type
Expand Down Expand Up @@ -795,15 +829,48 @@ def process_data(self, row):
sampling_date
)

species_name = DataCSVUpload.row_value(
row, VERBATUM_NAME
)

if not species_name:
species_name = DataCSVUpload.row_value(
row, SPECIES_NAME
)

certainty_of_identification = DataCSVUpload.row_value(
row, CERTAINTY_OF_IDENTIFICATION
)

date_accuracy = DataCSVUpload.row_value(
row, DATE_ACCURACY
)

data_type = DataCSVUpload.row_value(
row, DATA_TYPE
)
if data_type:
data_type = data_type.lower()
if 'public' in data_type:
data_type = 'public'
elif 'private' in data_type:
data_type = 'private'
elif 'sensitive' in data_type:
data_type = 'sensitive'
else:
data_type = ''

record = None
fields = {
'site': location_site,
'original_species_name': DataCSVUpload.row_value(
row, SPECIES_NAME),
'original_species_name': species_name,
'collection_date': sampling_date,
'taxonomy': taxonomy,
'collector_user': collector,
'validated': True
'validated': True,
'accuracy_of_identification': certainty_of_identification,
'date_accuracy': date_accuracy.lower() if date_accuracy else '',
'data_type': data_type
}
if uuid_value:
uuid_without_hyphen = uuid_value.replace('-', '')
Expand Down Expand Up @@ -856,13 +923,6 @@ def process_data(self, row):
record.additional_data = json.dumps(row)
record.validated = True

# -- Assigning source site
if not record.source_site and self.source_site:
record.source_site = self.source_site
elif record.source_site and self.source_site:
record.additional_observation_sites.add(
self.source_site.id)

record.save()

if not str(record.site.id) in self.site_ids:
Expand All @@ -884,7 +944,6 @@ def process_ended(self):

def process_row(self, row):
self.module_group = self.upload_session.module_group
self.source_site = self.upload_session.source_site
self.process_data(row)

def handle_error(self, row, message):
Expand Down
4 changes: 1 addition & 3 deletions bims/tasks/collections_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from celery import shared_task
FILE_HEADERS = [
'UUID',
'Site description',
'Latitude',
'Longitude',
'Sampling Date',
Expand Down Expand Up @@ -45,8 +44,7 @@ def check_and_clean_headers(_csv_file_path):
cleaned_headers = [clean_header(header) for header in original_headers]

cleaned_header_row = ','.join(cleaned_headers) + '\n'
if not all(header in cleaned_headers for header in FILE_HEADERS) or \
not any(header in cleaned_headers for header in FILE_HEADERS_USER_SITE_CODE):
if not all(header in cleaned_headers for header in FILE_HEADERS):
error_message = (
'Header row does not follow the correct format'
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def handle(self, *args, **options):
id=dupe['site_id']
)
bio = BiologicalCollectionRecord.objects.filter(
Q(source_site_id=site_id) | Q(additional_observation_sites=site_id),
survey__in=surveys,
)
if bio.count() == 0:
Expand Down

0 comments on commit 86ac1f1

Please sign in to comment.