Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update uploader to accommodate sanparks template #4038

Merged
merged 2 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Generated by Django 4.2.11 on 2024-07-03 14:22

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('bims', '0419_rename_role_link_profile_role'),
]

operations = [
migrations.RemoveField(
model_name='biologicalcollectionrecord',
name='additional_observation_sites',
),
migrations.RemoveField(
model_name='locationsite',
name='additional_observation_sites',
),
migrations.AddField(
model_name='biologicalcollectionrecord',
name='data_type',
field=models.CharField(blank=True, choices=[('private', 'Private'), ('public', 'Public'), ('sensitive', 'Sensitive')], default='', help_text='Specify data sharing level of this record', max_length=128),
),
migrations.AddField(
model_name='biologicalcollectionrecord',
name='date_accuracy',
field=models.CharField(blank=True, choices=[('accurate', 'Accurate'), ('artificial', 'Artificial')], default='', help_text='Indicate if date is accurate or artificial', max_length=64),
),
migrations.AlterField(
model_name='sitesetting',
name='iucn_api_key',
field=models.CharField(blank=True, default='', help_text='Token key for IUCN api', max_length=255),
),
migrations.AlterField(
model_name='sitesetting',
name='recaptcha_secret_key',
field=models.CharField(blank=True, default='', max_length=150),
),
migrations.AlterField(
model_name='sitesetting',
name='recaptcha_site_key',
field=models.CharField(blank=True, default='', max_length=150),
),
migrations.AlterField(
model_name='sitesetting',
name='site_code_generator',
field=models.CharField(blank=True, choices=[('bims', 'BIMS (2 Site Name + 2 Site Description + Site count)'), ('fbis', 'FBIS (2 Secondary catchment + 4 River + Site count)'), ('rbis', 'RBIS (Catchment + Province ID + District ID + Site count)'), ('sanparks', 'SANPARKS (1st three park name + 1st two site description + site count)')], default='bims', help_text='How site code generated', max_length=50),
),
]
39 changes: 27 additions & 12 deletions bims/models/biological_collection_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,17 @@ class BiologicalCollectionRecord(AbstractValidation):
(ABUNDANCE_TYPE_DENSITY_CELLS_ML, 'Density (cells/mL)'),
)

DATA_TYPE_CHOICES = (
('private', 'Private'),
('public', 'Public'),
('sensitive', 'Sensitive'),
)

DATE_ACCURACY_CHOICES = (
('accurate', 'Accurate'),
('artificial', 'Artificial'),
)

site = models.ForeignKey(
LocationSite,
models.CASCADE,
Expand All @@ -182,18 +193,6 @@ class BiologicalCollectionRecord(AbstractValidation):
verbose_name='collector or observer',
)

additional_observation_sites = models.ManyToManyField(
to=Site,
related_name='additional_observation_sites',
blank=True,
help_text="List of sites where this biological occurrence has also been observed. "
"This attribute allows for recording multiple observation locations beyond "
"the primary source site. For instance, if an occurrence is recorded at the "
"main location 'FBIS' and is also observed at 'SanParks', "
"this field facilitates linking the occurrence to 'SanParks' as "
"an additional observation site."
)

notes = models.TextField(
blank=True,
default='',
Expand Down Expand Up @@ -369,6 +368,22 @@ class BiologicalCollectionRecord(AbstractValidation):
null=True
)

data_type = models.CharField(
max_length=128,
blank=True,
choices=DATA_TYPE_CHOICES,
default='',
help_text='Specify data sharing level of this record'
)

date_accuracy = models.CharField(
max_length=64,
blank=True,
choices=DATE_ACCURACY_CHOICES,
default='',
help_text='Indicate if date is accurate or artificial'
)

@property
def data_name(self):
return self.original_species_name
Expand Down
14 changes: 2 additions & 12 deletions bims/models/location_site.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,6 @@ class LocationSite(AbstractValidation):
default=HYDROGEOMORPHIC_NONE,
choices=HYDROGEOMORPHIC_CHOICES
)
additional_observation_sites = models.ManyToManyField(
to=Site,
related_name='location_site_additional_observation_sites',
blank=True,
help_text="List of sites where this location site has also been observed. "
"This attribute allows for recording multiple observation locations beyond "
"the primary source site. For instance, if an occurrence is recorded at the "
"main location 'FBIS' and is also observed at 'SanParks', "
"this field facilitates linking the location site to 'SanParks' as "
"an additional observation site."
)

@property
def location_site_identifier(self):
Expand Down Expand Up @@ -596,7 +585,8 @@ def generate_site_code(
site_name_length = 3
if project_name in ['bims', 'sanparks'] and (site_name or site_description):
catchment_site_code = site_name[:site_name_length].upper()
catchment_site_code += site_description[:2].upper()
if project_name == 'bims':
catchment_site_code += site_description[:2].upper()
elif location_site:
catchment_site_code += location_site.name[:site_name_length].upper()
catchment_site_code += location_site.site_description[:4].upper()
Expand Down
6 changes: 6 additions & 0 deletions bims/scripts/collection_csv_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,9 @@

# Sanparks
PARK_OR_MPA_NAME = 'Park or MPA name'
ACCURACY_OF_COORDINATES = 'Accuracy of coordinates'

VERBATUM_NAME = 'Verbatum Name (Scientific or common)'
CERTAINTY_OF_IDENTIFICATION = 'Certainty of identification'
DATE_ACCURACY = 'Date Accuracy'
DATA_TYPE = 'Data type'
117 changes: 88 additions & 29 deletions bims/scripts/occurrences_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ class OccurrenceProcessor(object):

site_ids = []
module_group = None
source_site = None
# Whether the script should also fetch location context after ingesting
# collection data
fetch_location_context = True
Expand Down Expand Up @@ -277,6 +276,12 @@ def location_site(self, record):

park_name = DataCSVUpload.row_value(record, PARK_OR_MPA_NAME)

accuracy_of_coordinates = DataCSVUpload.row_value(
record, ACCURACY_OF_COORDINATES
)
if not accuracy_of_coordinates:
accuracy_of_coordinates = 100

if not longitude and not latitude and park_name:
wfs_url = preferences.SiteSetting.park_wfs_url
layer_name = preferences.SiteSetting.park_wfs_layer_name
Expand All @@ -287,22 +292,39 @@ def location_site(self, record):
latitude = self.park_centroid[park_name][0]
longitude = self.park_centroid[park_name][1]
else:
park_centroid = get_feature_centroid(
wfs_url,
layer_name,
attribute_key=attribute_key,
attribute_value=attribute_value
)
if park_centroid:
latitude = park_centroid[0]
longitude = park_centroid[1]
self.park_centroid[park_name] = park_centroid
# Check if there is already site with the same park name
site = LocationSite.objects.filter(
name=park_name
).first()
if site:
latitude = site.latitude
longitude = site.longitude
self.park_centroid[site.name] = [latitude, longitude]
# Check if site with same park name and accuracy of coordinates exists
site = LocationSite.objects.filter(
name=park_name,
accuracy_of_locality=int(accuracy_of_coordinates)
).exclude(site_code='').first()
if site:
# Return existing site
return site
else:
self.handle_error(
row=record,
message='Park or MPA name does not exist in the database'
park_centroid = get_feature_centroid(
wfs_url,
layer_name,
attribute_key=attribute_key,
attribute_value=attribute_value
)
return None
if park_centroid:
latitude = park_centroid[0]
longitude = park_centroid[1]
self.park_centroid[park_name] = park_centroid
else:
self.handle_error(
row=record,
message='Park or MPA name does not exist in the database'
)
return None

if not longitude or not latitude:
self.handle_error(
Expand All @@ -312,8 +334,8 @@ def location_site(self, record):
return None

try:
latitude = float(DataCSVUpload.row_value(record, LATITUDE))
longitude = float(DataCSVUpload.row_value(record, LONGITUDE))
latitude = float(latitude)
longitude = float(longitude)
except ValueError:
self.handle_error(
row=record,
Expand All @@ -334,6 +356,8 @@ def location_site(self, record):
location_site_name = DataCSVUpload.row_value(record, LOCATION_SITE)
elif wetland_name:
location_site_name = wetland_name
elif park_name:
location_site_name = park_name

# Find existing location site by data source site code
data_source = preferences.SiteSetting.default_data_source.upper()
Expand Down Expand Up @@ -396,9 +420,15 @@ def location_site(self, record):
lat=location_site.latitude,
lon=location_site.longitude,
ecosystem_type=location_site.ecosystem_type,
wetland_name=user_wetland_name
wetland_name=user_wetland_name,
**{
'site_desc': site_description,
'site_name': location_site_name
}
)
location_site.site_code = site_code
if accuracy_of_coordinates:
location_site.accuracy_of_locality = int(accuracy_of_coordinates)
location_site.save()
return location_site

Expand Down Expand Up @@ -758,6 +788,10 @@ def process_data(self, row):
record_type = RecordType.objects.filter(
name__iexact=record_type
).first()
if not record_type:
record_type = RecordType.objects.create(
name=record_type
)
else:
record_type = None
optional_data['record_type'] = record_type
Expand Down Expand Up @@ -795,15 +829,48 @@ def process_data(self, row):
sampling_date
)

species_name = DataCSVUpload.row_value(
row, VERBATUM_NAME
)

if not species_name:
species_name = DataCSVUpload.row_value(
row, SPECIES_NAME
)

certainty_of_identification = DataCSVUpload.row_value(
row, CERTAINTY_OF_IDENTIFICATION
)

date_accuracy = DataCSVUpload.row_value(
row, DATE_ACCURACY
)

data_type = DataCSVUpload.row_value(
row, DATA_TYPE
)
if data_type:
data_type = data_type.lower()
if 'public' in data_type:
data_type = 'public'
elif 'private' in data_type:
data_type = 'private'
elif 'sensitive' in data_type:
data_type = 'sensitive'
else:
data_type = ''

record = None
fields = {
'site': location_site,
'original_species_name': DataCSVUpload.row_value(
row, SPECIES_NAME),
'original_species_name': species_name,
'collection_date': sampling_date,
'taxonomy': taxonomy,
'collector_user': collector,
'validated': True
'validated': True,
'accuracy_of_identification': certainty_of_identification,
'date_accuracy': date_accuracy.lower() if date_accuracy else '',
'data_type': data_type
}
if uuid_value:
uuid_without_hyphen = uuid_value.replace('-', '')
Expand Down Expand Up @@ -856,13 +923,6 @@ def process_data(self, row):
record.additional_data = json.dumps(row)
record.validated = True

# -- Assigning source site
if not record.source_site and self.source_site:
record.source_site = self.source_site
elif record.source_site and self.source_site:
record.additional_observation_sites.add(
self.source_site.id)

record.save()

if not str(record.site.id) in self.site_ids:
Expand All @@ -884,7 +944,6 @@ def process_ended(self):

def process_row(self, row):
self.module_group = self.upload_session.module_group
self.source_site = self.upload_session.source_site
self.process_data(row)

def handle_error(self, row, message):
Expand Down
4 changes: 1 addition & 3 deletions bims/tasks/collections_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from celery import shared_task
FILE_HEADERS = [
'UUID',
'Site description',
'Latitude',
'Longitude',
'Sampling Date',
Expand Down Expand Up @@ -45,8 +44,7 @@ def check_and_clean_headers(_csv_file_path):
cleaned_headers = [clean_header(header) for header in original_headers]

cleaned_header_row = ','.join(cleaned_headers) + '\n'
if not all(header in cleaned_headers for header in FILE_HEADERS) or \
not any(header in cleaned_headers for header in FILE_HEADERS_USER_SITE_CODE):
if not all(header in cleaned_headers for header in FILE_HEADERS):
error_message = (
'Header row does not follow the correct format'
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def handle(self, *args, **options):
id=dupe['site_id']
)
bio = BiologicalCollectionRecord.objects.filter(
Q(source_site_id=site_id) | Q(additional_observation_sites=site_id),
survey__in=surveys,
)
if bio.count() == 0:
Expand Down