From 86ac1f110cc85021ab5ef57f5259769de6607d6d Mon Sep 17 00:00:00 2001 From: Dimas Ciputra Date: Thu, 4 Jul 2024 09:48:31 +0100 Subject: [PATCH] Update uploader to accommodate sanparks template (#4038) * Update uploader to accommodate sanparks template * Fix upload issues --- ...d_additional_observation_sites_and_more.py | 51 ++++++++ bims/models/biological_collection_record.py | 39 ++++-- bims/models/location_site.py | 14 +-- bims/scripts/collection_csv_keys.py | 6 + bims/scripts/occurrences_upload.py | 117 +++++++++++++----- bims/tasks/collections_upload.py | 4 +- .../combine_duplicated_site_visits.py | 1 - 7 files changed, 175 insertions(+), 57 deletions(-) create mode 100644 bims/migrations/0420_remove_biologicalcollectionrecord_additional_observation_sites_and_more.py diff --git a/bims/migrations/0420_remove_biologicalcollectionrecord_additional_observation_sites_and_more.py b/bims/migrations/0420_remove_biologicalcollectionrecord_additional_observation_sites_and_more.py new file mode 100644 index 000000000..fe9fb4251 --- /dev/null +++ b/bims/migrations/0420_remove_biologicalcollectionrecord_additional_observation_sites_and_more.py @@ -0,0 +1,51 @@ +# Generated by Django 4.2.11 on 2024-07-03 14:22 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('bims', '0419_rename_role_link_profile_role'), + ] + + operations = [ + migrations.RemoveField( + model_name='biologicalcollectionrecord', + name='additional_observation_sites', + ), + migrations.RemoveField( + model_name='locationsite', + name='additional_observation_sites', + ), + migrations.AddField( + model_name='biologicalcollectionrecord', + name='data_type', + field=models.CharField(blank=True, choices=[('private', 'Private'), ('public', 'Public'), ('sensitive', 'Sensitive')], default='', help_text='Specify data sharing level of this record', max_length=128), + ), + migrations.AddField( + model_name='biologicalcollectionrecord', + name='date_accuracy', + field=models.CharField(blank=True, choices=[('accurate', 'Accurate'), ('artificial', 'Artificial')], default='', help_text='Indicate if date is accurate or artificial', max_length=64), + ), + migrations.AlterField( + model_name='sitesetting', + name='iucn_api_key', + field=models.CharField(blank=True, default='', help_text='Token key for IUCN api', max_length=255), + ), + migrations.AlterField( + model_name='sitesetting', + name='recaptcha_secret_key', + field=models.CharField(blank=True, default='', max_length=150), + ), + migrations.AlterField( + model_name='sitesetting', + name='recaptcha_site_key', + field=models.CharField(blank=True, default='', max_length=150), + ), + migrations.AlterField( + model_name='sitesetting', + name='site_code_generator', + field=models.CharField(blank=True, choices=[('bims', 'BIMS (2 Site Name + 2 Site Description + Site count)'), ('fbis', 'FBIS (2 Secondary catchment + 4 River + Site count)'), ('rbis', 'RBIS (Catchment + Province ID + District ID + Site count)'), ('sanparks', 'SANPARKS (1st three park name + 1st two site description + site count)')], default='bims', help_text='How site code generated', max_length=50), + ), + ] diff --git a/bims/models/biological_collection_record.py b/bims/models/biological_collection_record.py index 54730e2e8..45d4d99f9 100644 --- a/bims/models/biological_collection_record.py +++ b/bims/models/biological_collection_record.py @@ -157,6 +157,17 @@ class BiologicalCollectionRecord(AbstractValidation): (ABUNDANCE_TYPE_DENSITY_CELLS_ML, 'Density (cells/mL)'), ) + DATA_TYPE_CHOICES = ( + ('private', 'Private'), + ('public', 'Public'), + ('sensitive', 'Sensitive'), + ) + + DATE_ACCURACY_CHOICES = ( + ('accurate', 'Accurate'), + ('artificial', 'Artificial'), + ) + site = models.ForeignKey( LocationSite, models.CASCADE, @@ -182,18 +193,6 @@ class BiologicalCollectionRecord(AbstractValidation): verbose_name='collector or observer', ) - additional_observation_sites = models.ManyToManyField( - to=Site, - related_name='additional_observation_sites', - blank=True, - help_text="List of sites where this biological occurrence has also been observed. " - "This attribute allows for recording multiple observation locations beyond " - "the primary source site. For instance, if an occurrence is recorded at the " - "main location 'FBIS' and is also observed at 'SanParks', " - "this field facilitates linking the occurrence to 'SanParks' as " - "an additional observation site." - ) - notes = models.TextField( blank=True, default='', @@ -369,6 +368,22 @@ class BiologicalCollectionRecord(AbstractValidation): null=True ) + data_type = models.CharField( + max_length=128, + blank=True, + choices=DATA_TYPE_CHOICES, + default='', + help_text='Specify data sharing level of this record' + ) + + date_accuracy = models.CharField( + max_length=64, + blank=True, + choices=DATE_ACCURACY_CHOICES, + default='', + help_text='Indicate if date is accurate or artificial' + ) + @property def data_name(self): return self.original_species_name diff --git a/bims/models/location_site.py b/bims/models/location_site.py index 1ad0921ba..9964231a9 100644 --- a/bims/models/location_site.py +++ b/bims/models/location_site.py @@ -210,17 +210,6 @@ class LocationSite(AbstractValidation): default=HYDROGEOMORPHIC_NONE, choices=HYDROGEOMORPHIC_CHOICES ) - additional_observation_sites = models.ManyToManyField( - to=Site, - related_name='location_site_additional_observation_sites', - blank=True, - help_text="List of sites where this location site has also been observed. " - "This attribute allows for recording multiple observation locations beyond " - "the primary source site. For instance, if an occurrence is recorded at the " - "main location 'FBIS' and is also observed at 'SanParks', " - "this field facilitates linking the location site to 'SanParks' as " - "an additional observation site." - ) @property def location_site_identifier(self): @@ -596,7 +585,8 @@ def generate_site_code( site_name_length = 3 if project_name in ['bims', 'sanparks'] and (site_name or site_description): catchment_site_code = site_name[:site_name_length].upper() - catchment_site_code += site_description[:2].upper() + if project_name == 'bims': + catchment_site_code += site_description[:2].upper() elif location_site: catchment_site_code += location_site.name[:site_name_length].upper() catchment_site_code += location_site.site_description[:4].upper() diff --git a/bims/scripts/collection_csv_keys.py b/bims/scripts/collection_csv_keys.py index b6e2f5a32..840a51207 100644 --- a/bims/scripts/collection_csv_keys.py +++ b/bims/scripts/collection_csv_keys.py @@ -92,3 +92,9 @@ # Sanparks PARK_OR_MPA_NAME = 'Park or MPA name' +ACCURACY_OF_COORDINATES = 'Accuracy of coordinates' + +VERBATUM_NAME = 'Verbatum Name (Scientific or common)' +CERTAINTY_OF_IDENTIFICATION = 'Certainty of identification' +DATE_ACCURACY = 'Date Accuracy' +DATA_TYPE = 'Data type' diff --git a/bims/scripts/occurrences_upload.py b/bims/scripts/occurrences_upload.py index 231f6a994..a42498067 100644 --- a/bims/scripts/occurrences_upload.py +++ b/bims/scripts/occurrences_upload.py @@ -67,7 +67,6 @@ class OccurrenceProcessor(object): site_ids = [] module_group = None - source_site = None # Whether the script should also fetch location context after ingesting # collection data fetch_location_context = True @@ -277,6 +276,12 @@ def location_site(self, record): park_name = DataCSVUpload.row_value(record, PARK_OR_MPA_NAME) + accuracy_of_coordinates = DataCSVUpload.row_value( + record, ACCURACY_OF_COORDINATES + ) + if not accuracy_of_coordinates: + accuracy_of_coordinates = 100 + if not longitude and not latitude and park_name: wfs_url = preferences.SiteSetting.park_wfs_url layer_name = preferences.SiteSetting.park_wfs_layer_name @@ -287,22 +292,39 @@ def location_site(self, record): latitude = self.park_centroid[park_name][0] longitude = self.park_centroid[park_name][1] else: - park_centroid = get_feature_centroid( - wfs_url, - layer_name, - attribute_key=attribute_key, - attribute_value=attribute_value - ) - if park_centroid: - latitude = park_centroid[0] - longitude = park_centroid[1] - self.park_centroid[park_name] = park_centroid + # Check if there is already site with the same park name + site = LocationSite.objects.filter( + name=park_name + ).first() + if site: + latitude = site.latitude + longitude = site.longitude + self.park_centroid[site.name] = [latitude, longitude] + # Check if site with same park name and accuracy of coordinates exists + site = LocationSite.objects.filter( + name=park_name, + accuracy_of_locality=int(accuracy_of_coordinates) + ).exclude(site_code='').first() + if site: + # Return existing site + return site else: - self.handle_error( - row=record, - message='Park or MPA name does not exist in the database' + park_centroid = get_feature_centroid( + wfs_url, + layer_name, + attribute_key=attribute_key, + attribute_value=attribute_value ) - return None + if park_centroid: + latitude = park_centroid[0] + longitude = park_centroid[1] + self.park_centroid[park_name] = park_centroid + else: + self.handle_error( + row=record, + message='Park or MPA name does not exist in the database' + ) + return None if not longitude or not latitude: self.handle_error( @@ -312,8 +334,8 @@ def location_site(self, record): return None try: - latitude = float(DataCSVUpload.row_value(record, LATITUDE)) - longitude = float(DataCSVUpload.row_value(record, LONGITUDE)) + latitude = float(latitude) + longitude = float(longitude) except ValueError: self.handle_error( row=record, @@ -334,6 +356,8 @@ def location_site(self, record): location_site_name = DataCSVUpload.row_value(record, LOCATION_SITE) elif wetland_name: location_site_name = wetland_name + elif park_name: + location_site_name = park_name # Find existing location site by data source site code data_source = preferences.SiteSetting.default_data_source.upper() @@ -396,9 +420,15 @@ def location_site(self, record): lat=location_site.latitude, lon=location_site.longitude, ecosystem_type=location_site.ecosystem_type, - wetland_name=user_wetland_name + wetland_name=user_wetland_name, + **{ + 'site_desc': site_description, + 'site_name': location_site_name + } ) location_site.site_code = site_code + if accuracy_of_coordinates: + location_site.accuracy_of_locality = int(accuracy_of_coordinates) location_site.save() return location_site @@ -758,6 +788,10 @@ def process_data(self, row): record_type = RecordType.objects.filter( name__iexact=record_type ).first() + if not record_type: + record_type = RecordType.objects.create( + name=record_type + ) else: record_type = None optional_data['record_type'] = record_type @@ -795,15 +829,48 @@ def process_data(self, row): sampling_date ) + species_name = DataCSVUpload.row_value( + row, VERBATUM_NAME + ) + + if not species_name: + species_name = DataCSVUpload.row_value( + row, SPECIES_NAME + ) + + certainty_of_identification = DataCSVUpload.row_value( + row, CERTAINTY_OF_IDENTIFICATION + ) + + date_accuracy = DataCSVUpload.row_value( + row, DATE_ACCURACY + ) + + data_type = DataCSVUpload.row_value( + row, DATA_TYPE + ) + if data_type: + data_type = data_type.lower() + if 'public' in data_type: + data_type = 'public' + elif 'private' in data_type: + data_type = 'private' + elif 'sensitive' in data_type: + data_type = 'sensitive' + else: + data_type = '' + record = None fields = { 'site': location_site, - 'original_species_name': DataCSVUpload.row_value( - row, SPECIES_NAME), + 'original_species_name': species_name, 'collection_date': sampling_date, 'taxonomy': taxonomy, 'collector_user': collector, - 'validated': True + 'validated': True, + 'accuracy_of_identification': certainty_of_identification, + 'date_accuracy': date_accuracy.lower() if date_accuracy else '', + 'data_type': data_type } if uuid_value: uuid_without_hyphen = uuid_value.replace('-', '') @@ -856,13 +923,6 @@ def process_data(self, row): record.additional_data = json.dumps(row) record.validated = True - # -- Assigning source site - if not record.source_site and self.source_site: - record.source_site = self.source_site - elif record.source_site and self.source_site: - record.additional_observation_sites.add( - self.source_site.id) - record.save() if not str(record.site.id) in self.site_ids: @@ -884,7 +944,6 @@ def process_ended(self): def process_row(self, row): self.module_group = self.upload_session.module_group - self.source_site = self.upload_session.source_site self.process_data(row) def handle_error(self, row, message): diff --git a/bims/tasks/collections_upload.py b/bims/tasks/collections_upload.py index 05418073e..ba89f88d3 100644 --- a/bims/tasks/collections_upload.py +++ b/bims/tasks/collections_upload.py @@ -3,7 +3,6 @@ from celery import shared_task FILE_HEADERS = [ 'UUID', - 'Site description', 'Latitude', 'Longitude', 'Sampling Date', @@ -45,8 +44,7 @@ def check_and_clean_headers(_csv_file_path): cleaned_headers = [clean_header(header) for header in original_headers] cleaned_header_row = ','.join(cleaned_headers) + '\n' - if not all(header in cleaned_headers for header in FILE_HEADERS) or \ - not any(header in cleaned_headers for header in FILE_HEADERS_USER_SITE_CODE): + if not all(header in cleaned_headers for header in FILE_HEADERS): error_message = ( 'Header row does not follow the correct format' ) diff --git a/scripts/management/commands/combine_duplicated_site_visits.py b/scripts/management/commands/combine_duplicated_site_visits.py index 61f18a0e5..3d3aeea00 100644 --- a/scripts/management/commands/combine_duplicated_site_visits.py +++ b/scripts/management/commands/combine_duplicated_site_visits.py @@ -59,7 +59,6 @@ def handle(self, *args, **options): id=dupe['site_id'] ) bio = BiologicalCollectionRecord.objects.filter( - Q(source_site_id=site_id) | Q(additional_observation_sites=site_id), survey__in=surveys, ) if bio.count() == 0: