Update uploader to accommodate sanparks template (#4038)

* Update uploader to accommodate sanparks template * Fix upload issues
kartoza · Jul 11, 2024 · 86ac1f1 · 86ac1f1
1 parent d47e091
commit 86ac1f1
Show file tree

Hide file tree

Showing 7 changed files with 175 additions and 57 deletions.
diff --git a/...igrations/0420_remove_biologicalcollectionrecord_additional_observation_sites_and_more.py b/...igrations/0420_remove_biologicalcollectionrecord_additional_observation_sites_and_more.py
@@ -0,0 +1,51 @@
+# Generated by Django 4.2.11 on 2024-07-03 14:22
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('bims', '0419_rename_role_link_profile_role'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='biologicalcollectionrecord',
+            name='additional_observation_sites',
+        ),
+        migrations.RemoveField(
+            model_name='locationsite',
+            name='additional_observation_sites',
+        ),
+        migrations.AddField(
+            model_name='biologicalcollectionrecord',
+            name='data_type',
+            field=models.CharField(blank=True, choices=[('private', 'Private'), ('public', 'Public'), ('sensitive', 'Sensitive')], default='', help_text='Specify data sharing level of this record', max_length=128),
+        ),
+        migrations.AddField(
+            model_name='biologicalcollectionrecord',
+            name='date_accuracy',
+            field=models.CharField(blank=True, choices=[('accurate', 'Accurate'), ('artificial', 'Artificial')], default='', help_text='Indicate if date is accurate or artificial', max_length=64),
+        ),
+        migrations.AlterField(
+            model_name='sitesetting',
+            name='iucn_api_key',
+            field=models.CharField(blank=True, default='', help_text='Token key for IUCN api', max_length=255),
+        ),
+        migrations.AlterField(
+            model_name='sitesetting',
+            name='recaptcha_secret_key',
+            field=models.CharField(blank=True, default='', max_length=150),
+        ),
+        migrations.AlterField(
+            model_name='sitesetting',
+            name='recaptcha_site_key',
+            field=models.CharField(blank=True, default='', max_length=150),
+        ),
+        migrations.AlterField(
+            model_name='sitesetting',
+            name='site_code_generator',
+            field=models.CharField(blank=True, choices=[('bims', 'BIMS (2 Site Name + 2 Site Description + Site count)'), ('fbis', 'FBIS (2 Secondary catchment + 4 River + Site count)'), ('rbis', 'RBIS (Catchment + Province ID + District ID + Site count)'), ('sanparks', 'SANPARKS (1st three park name + 1st two site description + site count)')], default='bims', help_text='How site code generated', max_length=50),
+        ),
+    ]
diff --git a/bims/models/biological_collection_record.py b/bims/models/biological_collection_record.py
@@ -157,6 +157,17 @@ class BiologicalCollectionRecord(AbstractValidation):
         (ABUNDANCE_TYPE_DENSITY_CELLS_ML, 'Density (cells/mL)'),
     )
 
+    DATA_TYPE_CHOICES = (
+        ('private', 'Private'),
+        ('public', 'Public'),
+        ('sensitive', 'Sensitive'),
+    )
+
+    DATE_ACCURACY_CHOICES = (
+        ('accurate', 'Accurate'),
+        ('artificial', 'Artificial'),
+    )
+
     site = models.ForeignKey(
         LocationSite,
         models.CASCADE,
@@ -182,18 +193,6 @@ class BiologicalCollectionRecord(AbstractValidation):
         verbose_name='collector or observer',
     )
 
-    additional_observation_sites = models.ManyToManyField(
-        to=Site,
-        related_name='additional_observation_sites',
-        blank=True,
-        help_text="List of sites where this biological occurrence has also been observed. "
-                  "This attribute allows for recording multiple observation locations beyond "
-                  "the primary source site. For instance, if an occurrence is recorded at the "
-                  "main location 'FBIS' and is also observed at 'SanParks', "
-                  "this field facilitates linking the occurrence to 'SanParks' as "
-                  "an additional observation site."
-    )
-
     notes = models.TextField(
         blank=True,
         default='',
@@ -369,6 +368,22 @@ class BiologicalCollectionRecord(AbstractValidation):
         null=True
     )
 
+    data_type = models.CharField(
+        max_length=128,
+        blank=True,
+        choices=DATA_TYPE_CHOICES,
+        default='',
+        help_text='Specify data sharing level of this record'
+    )
+
+    date_accuracy = models.CharField(
+        max_length=64,
+        blank=True,
+        choices=DATE_ACCURACY_CHOICES,
+        default='',
+        help_text='Indicate if date is accurate or artificial'
+    )
+
     @property
     def data_name(self):
         return self.original_species_name

diff --git a/bims/models/location_site.py b/bims/models/location_site.py
@@ -210,17 +210,6 @@ class LocationSite(AbstractValidation):
         default=HYDROGEOMORPHIC_NONE,
         choices=HYDROGEOMORPHIC_CHOICES
     )
-    additional_observation_sites = models.ManyToManyField(
-        to=Site,
-        related_name='location_site_additional_observation_sites',
-        blank=True,
-        help_text="List of sites where this location site has also been observed. "
-                  "This attribute allows for recording multiple observation locations beyond "
-                  "the primary source site. For instance, if an occurrence is recorded at the "
-                  "main location 'FBIS' and is also observed at 'SanParks', "
-                  "this field facilitates linking the location site to 'SanParks' as "
-                  "an additional observation site."
-    )
 
     @property
     def location_site_identifier(self):
@@ -596,7 +585,8 @@ def generate_site_code(
             site_name_length = 3
         if project_name in ['bims', 'sanparks'] and (site_name or site_description):
             catchment_site_code = site_name[:site_name_length].upper()
-            catchment_site_code += site_description[:2].upper()
+            if project_name == 'bims':
+                catchment_site_code += site_description[:2].upper()
         elif location_site:
             catchment_site_code += location_site.name[:site_name_length].upper()
             catchment_site_code += location_site.site_description[:4].upper()

diff --git a/bims/scripts/collection_csv_keys.py b/bims/scripts/collection_csv_keys.py
@@ -92,3 +92,9 @@
 
 # Sanparks
 PARK_OR_MPA_NAME = 'Park or MPA name'
+ACCURACY_OF_COORDINATES = 'Accuracy of coordinates'
+
+VERBATUM_NAME = 'Verbatum Name (Scientific or common)'
+CERTAINTY_OF_IDENTIFICATION = 'Certainty of identification'
+DATE_ACCURACY = 'Date Accuracy'
+DATA_TYPE = 'Data type'
diff --git a/bims/scripts/occurrences_upload.py b/bims/scripts/occurrences_upload.py
@@ -67,7 +67,6 @@ class OccurrenceProcessor(object):
 
     site_ids = []
     module_group = None
-    source_site = None
     # Whether the script should also fetch location context after ingesting
     # collection data
     fetch_location_context = True
@@ -277,6 +276,12 @@ def location_site(self, record):
 
         park_name = DataCSVUpload.row_value(record, PARK_OR_MPA_NAME)
 
+        accuracy_of_coordinates = DataCSVUpload.row_value(
+            record, ACCURACY_OF_COORDINATES
+        )
+        if not accuracy_of_coordinates:
+            accuracy_of_coordinates = 100
+
         if not longitude and not latitude and park_name:
             wfs_url = preferences.SiteSetting.park_wfs_url
             layer_name = preferences.SiteSetting.park_wfs_layer_name
@@ -287,22 +292,39 @@ def location_site(self, record):
                 latitude = self.park_centroid[park_name][0]
                 longitude = self.park_centroid[park_name][1]
             else:
-                park_centroid = get_feature_centroid(
-                    wfs_url,
-                    layer_name,
-                    attribute_key=attribute_key,
-                    attribute_value=attribute_value
-                )
-                if park_centroid:
-                    latitude = park_centroid[0]
-                    longitude = park_centroid[1]
-                    self.park_centroid[park_name] = park_centroid
+                # Check if there is already site with the same park name
+                site = LocationSite.objects.filter(
+                    name=park_name
+                ).first()
+                if site:
+                    latitude = site.latitude
+                    longitude = site.longitude
+                    self.park_centroid[site.name] = [latitude, longitude]
+                    # Check if site with same park name and accuracy of coordinates exists
+                    site = LocationSite.objects.filter(
+                        name=park_name,
+                        accuracy_of_locality=int(accuracy_of_coordinates)
+                    ).exclude(site_code='').first()
+                    if site:
+                        # Return existing site
+                        return site
                 else:
-                    self.handle_error(
-                        row=record,
-                        message='Park or MPA name does not exist in the database'
+                    park_centroid = get_feature_centroid(
+                        wfs_url,
+                        layer_name,
+                        attribute_key=attribute_key,
+                        attribute_value=attribute_value
                     )
-                    return None
+                    if park_centroid:
+                        latitude = park_centroid[0]
+                        longitude = park_centroid[1]
+                        self.park_centroid[park_name] = park_centroid
+                    else:
+                        self.handle_error(
+                            row=record,
+                            message='Park or MPA name does not exist in the database'
+                        )
+                        return None
 
         if not longitude or not latitude:
             self.handle_error(
@@ -312,8 +334,8 @@ def location_site(self, record):
             return None
 
         try:
-            latitude = float(DataCSVUpload.row_value(record, LATITUDE))
-            longitude = float(DataCSVUpload.row_value(record, LONGITUDE))
+            latitude = float(latitude)
+            longitude = float(longitude)
         except ValueError:
             self.handle_error(
                 row=record,
@@ -334,6 +356,8 @@ def location_site(self, record):
             location_site_name = DataCSVUpload.row_value(record, LOCATION_SITE)
         elif wetland_name:
             location_site_name = wetland_name
+        elif park_name:
+            location_site_name = park_name
 
         # Find existing location site by data source site code
         data_source = preferences.SiteSetting.default_data_source.upper()
@@ -396,9 +420,15 @@ def location_site(self, record):
                 lat=location_site.latitude,
                 lon=location_site.longitude,
                 ecosystem_type=location_site.ecosystem_type,
-                wetland_name=user_wetland_name
+                wetland_name=user_wetland_name,
+                **{
+                    'site_desc': site_description,
+                    'site_name': location_site_name
+                }
             )
             location_site.site_code = site_code
+        if accuracy_of_coordinates:
+            location_site.accuracy_of_locality = int(accuracy_of_coordinates)
         location_site.save()
         return location_site
 
@@ -758,6 +788,10 @@ def process_data(self, row):
             record_type = RecordType.objects.filter(
                 name__iexact=record_type
             ).first()
+            if not record_type:
+                record_type = RecordType.objects.create(
+                    name=record_type
+                )
         else:
             record_type = None
         optional_data['record_type'] = record_type
@@ -795,15 +829,48 @@ def process_data(self, row):
             sampling_date
         )
 
+        species_name = DataCSVUpload.row_value(
+            row, VERBATUM_NAME
+        )
+
+        if not species_name:
+            species_name = DataCSVUpload.row_value(
+                row, SPECIES_NAME
+            )
+
+        certainty_of_identification = DataCSVUpload.row_value(
+            row, CERTAINTY_OF_IDENTIFICATION
+        )
+
+        date_accuracy = DataCSVUpload.row_value(
+            row, DATE_ACCURACY
+        )
+
+        data_type = DataCSVUpload.row_value(
+            row, DATA_TYPE
+        )
+        if data_type:
+            data_type = data_type.lower()
+            if 'public' in data_type:
+                data_type = 'public'
+            elif 'private' in data_type:
+                data_type = 'private'
+            elif 'sensitive' in data_type:
+                data_type = 'sensitive'
+            else:
+                data_type = ''
+
         record = None
         fields = {
             'site': location_site,
-            'original_species_name': DataCSVUpload.row_value(
-                row, SPECIES_NAME),
+            'original_species_name': species_name,
             'collection_date': sampling_date,
             'taxonomy': taxonomy,
             'collector_user': collector,
-            'validated': True
+            'validated': True,
+            'accuracy_of_identification': certainty_of_identification,
+            'date_accuracy': date_accuracy.lower() if date_accuracy else '',
+            'data_type': data_type
         }
         if uuid_value:
             uuid_without_hyphen = uuid_value.replace('-', '')
@@ -856,13 +923,6 @@ def process_data(self, row):
         record.additional_data = json.dumps(row)
         record.validated = True
 
-        # -- Assigning source site
-        if not record.source_site and self.source_site:
-            record.source_site = self.source_site
-        elif record.source_site and self.source_site:
-            record.additional_observation_sites.add(
-                self.source_site.id)
-
         record.save()
 
         if not str(record.site.id) in self.site_ids:
@@ -884,7 +944,6 @@ def process_ended(self):
 
     def process_row(self, row):
         self.module_group = self.upload_session.module_group
-        self.source_site = self.upload_session.source_site
         self.process_data(row)
 
     def handle_error(self, row, message):

diff --git a/bims/tasks/collections_upload.py b/bims/tasks/collections_upload.py
@@ -3,7 +3,6 @@
 from celery import shared_task
 FILE_HEADERS = [
     'UUID',
-    'Site description',
     'Latitude',
     'Longitude',
     'Sampling Date',
@@ -45,8 +44,7 @@ def check_and_clean_headers(_csv_file_path):
                 cleaned_headers = [clean_header(header) for header in original_headers]
 
                 cleaned_header_row = ','.join(cleaned_headers) + '\n'
-                if not all(header in cleaned_headers for header in FILE_HEADERS) or \
-                        not any(header in cleaned_headers for header in FILE_HEADERS_USER_SITE_CODE):
+                if not all(header in cleaned_headers for header in FILE_HEADERS):
                     error_message = (
                         'Header row does not follow the correct format'
                     )

diff --git a/scripts/management/commands/combine_duplicated_site_visits.py b/scripts/management/commands/combine_duplicated_site_visits.py
@@ -59,7 +59,6 @@ def handle(self, *args, **options):
                 id=dupe['site_id']
             )
             bio = BiologicalCollectionRecord.objects.filter(
-                Q(source_site_id=site_id) | Q(additional_observation_sites=site_id),
                 survey__in=surveys,
             )
             if bio.count() == 0: