Merge pull request #51 from Imageomics/feature/long

Recognize `long` and `lon` for longitude, also removes case-sensitivity (sets all columns to capitalized values).
Imageomics · Sep 1, 2023 · ee5c877 · ee5c877
2 parents c14bd04 + 71cfc40
commit ee5c877
Show file tree

Hide file tree

Showing 11 changed files with 81 additions and 54 deletions.
diff --git a/README.md b/README.md
@@ -12,10 +12,11 @@ For full dashboard functionality, upload a CSV or XLS file with the following co
 - `Sex`: Sex of each sample.
 - `hybrid_stat`: Hybrid status of each sample (eg., 'valid_subspecies', 'subspecies_synonym', or 'unknown').
 - `lat`*: Latitude at which image was taken or specimen was collected: number in [-90,90].
-- `lon`*:  Longitude at which image was taken or specimen was collected: number in [-180,180].
+- `lon`*:  Longitude at which image was taken or specimen was collected: number in [-180,180]. `long` will also be accepted.
 - `file_url`*: URL to access file.
 
 ***Note:** 
+- Column names are **not** case-sensitive.
 - `lat` and `lon` columns are not required to utilize the dashboard, but there will be no map view if they are not included. Blank (or null) entries are recorded as `unknown`, and thus excluded from map view.
 - `Image_filename` and `file_url` are not required, but there will be no sample images option if either one is not included.
 - `locality` may be provided, otherwise it will take on the value `lat|lon` or `unknown` if these are not provided.

diff --git a/components/divs.py b/components/divs.py
@@ -18,8 +18,8 @@
                 {'label': 'Subspecies', 'value': 'Subspecies'},
                 {'label':'View', 'value': 'View'},
                 {'label': 'Sex', 'value': 'Sex'},
-                {'label': 'Hybrid Status', 'value':'hybrid_stat'}, 
-                {'label': 'Locality', 'value': 'locality'}
+                {'label': 'Hybrid Status', 'value':'Hybrid_stat'}, 
+                {'label': 'Locality', 'value': 'Locality'}
                 ]
 DOCS_URL = "https://github.com/Imageomics/dashboard-prototype#how-it-works"
 DOCS_LINK = html.A("documentation",
@@ -196,8 +196,8 @@ def get_img_div(df, all_species, img_url):
                             style = QUARTER_DIV_STYLE
                             ),
                         html.Div([
-                            dcc.Checklist(df.hybrid_stat.unique(), 
-                                            df.hybrid_stat.unique()[0:2],
+                            dcc.Checklist(df.Hybrid_stat.unique(), 
+                                            df.Hybrid_stat.unique()[0:2],
                                             id = 'hybrid?')],
                             style = QUARTER_DIV_STYLE
                             ),

diff --git a/components/graphs.py b/components/graphs.py
@@ -54,8 +54,8 @@ def make_map(df, color_by):
     # only use entries that have valid lat & lon for mapping
     df = df.loc[df['lat-lon'].str.contains('unknown') == False]
     fig = px.scatter_mapbox(df,
-                        lat = "lat",
-                        lon = "lon",
+                        lat = "Lat",
+                        lon = "Lon",
                         #projection = "natural earth",
                         custom_data = ["Samples_at_locality", "Species_at_locality", "Subspecies_at_locality"],
                         size = "Samples_at_locality",
@@ -64,8 +64,8 @@ def make_map(df, color_by):
                         title = "Distribution of Samples")
 
     fig.update_traces(hovertemplate = 
-                        "Latitude: %{lat}<br>"+
-                        "Longitude: %{lon}<br>" +
+                        "Latitude: %{Lat}<br>"+
+                        "Longitude: %{Lon}<br>" +
                         "Samples at lat/lon: %{customdata[0]}<br>" +
                         "Species at lat/lon: %{customdata[1]}<br>" +
                         "Subspecies at lat/lon: %{customdata[2]}<br>"

diff --git a/components/query.py b/components/query.py
@@ -17,7 +17,7 @@ def get_data(df, mapping, features):
     df - DataFrame of the data to visualize.
     mapping - Boolean. True when lat/lon are given in dataset.
     features - List of features (columns) included in the DataFrame. This is a subset of the suggested columns: 
-                'Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename'
+                'Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename'
             
     Returns:
     --------
@@ -29,24 +29,24 @@ def get_data(df, mapping, features):
     # Will likely choose to calculate and return this in later instance    
     cat_list = [{'label': 'Species', 'value': 'Species'},
                 {'label': 'Subspecies', 'value': 'Subspecies'},
-                {'label':'View', 'value': 'View'},
+                {'label': 'View', 'value': 'View'},
                 {'label': 'Sex', 'value': 'Sex'},
-                {'label': 'Hybrid Status', 'value':'hybrid_stat'},
-                {'label': 'Locality', 'value': 'locality'}
+                {'label': 'Hybrid Status', 'value':'Hybrid_stat'},
+                {'label': 'Locality', 'value': 'Locality'}
     ]
 
     df = df.copy()
     df = df.fillna('unknown')
-    features.append('locality')
+    features.append('Locality')
 
     # If we don't have lat/lon, just return DataFrame with otherwise required features.
     if not mapping:
-        if 'locality' not in df.columns:
-            df['locality'] = 'unknown'
+        if 'Locality' not in df.columns:
+            df['Locality'] = 'unknown'
         return df[features], cat_list      
 
     # else lat and lon are in dataset, so process locality information
-    df['lat-lon'] = df['lat'].astype(str) + '|' + df['lon'].astype(str)
+    df['lat-lon'] = df['Lat'].astype(str) + '|' + df['Lon'].astype(str)
     df["Samples_at_locality"] = df['lat-lon'].map(df['lat-lon'].value_counts()) # will duplicate if multiple views of same sample
 
     # Count and record number of species and subspecies at each lat-lon
@@ -56,8 +56,8 @@ def get_data(df, mapping, features):
         df.loc[df['lat-lon'] == lat_lon, "Species_at_locality"] = ", ".join(species_list)
         df.loc[df['lat-lon'] == lat_lon, "Subspecies_at_locality"] = ", ".join(subspecies_list)
 
-    if 'locality' not in df.columns:
-        df['locality'] = df['lat-lon'] # contains "unknown" if lat or lon null
+    if 'Locality' not in df.columns:
+        df['Locality'] = df['lat-lon'] # contains "unknown" if lat or lon null
 
     new_features = ['lat-lon', "Samples_at_locality", "Species_at_locality", "Subspecies_at_locality"]
     for feature in new_features:
@@ -157,7 +157,7 @@ def get_filenames(df, subspecies, view, sex, hybrid, num_images):
         df_sub = df.loc[df.Subspecies.isin(subspecies)].copy()
     df_sub = df_sub.loc[df_sub.View.isin(view)]
     df_sub = df_sub.loc[df_sub.Sex.isin(sex)]
-    df_sub = df_sub.loc[df_sub.hybrid_stat.isin(hybrid)]
+    df_sub = df_sub.loc[df_sub.Hybrid_stat.isin(hybrid)]
 
     num_entries = len(df_sub)
     # Filter out any entries that have missing filenames or URLs:

diff --git a/dashboard.py b/dashboard.py
@@ -81,13 +81,21 @@ def parse_contents(contents, filename):
     # If no image urls, disable sample image options
     mapping = True
     img_urls = True
-    features = ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename']
+    features = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename']
     included_features = []
+    df.columns = df.columns.str.capitalize()
     for feature in features:
         if feature not in list(df.columns):
-            if feature == 'lat' or feature == 'lon':
-                mapping = False
-            elif feature == 'file_url':
+            if feature == 'Lat' or feature == 'Lon':
+                if feature == 'Lon':
+                    if 'Long' not in list(df.columns):
+                        mapping = False
+                    else:
+                        df = df.rename(columns = {"Long": "Lon"})
+                        included_features.append('Lon')
+                else:
+                    mapping = False
+            elif feature == 'File_url':
                 img_urls = False
             elif feature == 'Image_filename':
                 # If 'Image_filename' missing, return missing column if 'file_url' is included.
@@ -102,10 +110,10 @@ def parse_contents(contents, filename):
     if mapping:
         try:
             # Check lat and lon within appropriate ranges (lat: [-90, 90], lon: [-180, 180])
-            valid_lat = df['lat'].astype(float).between(-90, 90)
-            df.loc[~valid_lat, 'lat'] = 'unknown'
-            valid_lon = df['lon'].astype(float).between(-180, 180)
-            df.loc[~valid_lon, 'lon'] = 'unknown'
+            valid_lat = df['Lat'].astype(float).between(-90, 90)
+            df.loc[~valid_lat, 'Lat'] = 'unknown'
+            valid_lon = df['Lon'].astype(float).between(-180, 180)
+            df.loc[~valid_lon, 'Lon'] = 'unknown'
         except ValueError as e:
             print(e)
             return json.dumps({'error': {'mapping': str(e)}})

diff --git a/test_data/HCGSD_test_latLong.csv b/test_data/HCGSD_test_latLong.csv
@@ -0,0 +1,11 @@
+NHM_Specimen,Image_filename,View,Species,Subspecies,Sex,addit_taxa_info,type_stat,hybrid_stat,in_reduced,locality,lat,long,speciesdesig,file_url
+10429021,10429021_V_lowres.png,,erato,notabilis,,f._notabilis,,subspecies synonym,1,,-1.583333333,-77.75,e. notabilis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
+10428972,10428972_V_lowres.png,ventral,erato,petiverana,male,petiverana,,valid subspecies,1,Songolica (= Zongolica) MEX VC,18.66666667,-96.98333333,e. petiverana,
+10429172,,ventral,,petiverana,male,petiverana,,valid subspecies,1,San Ramon NIC ZE,89,-84.68333333,e. petiverana,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
+10428595,10428595_D_lowres.png,dorsal,erato,phyllis,male,f._phyllis,,subspecies synonym,1,Resistencia ARG CH,-27.45,-58.98333333,e. phyllis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
+10428140,10428140_V_lowres.png,ventral,,plesseni,male,plesseni,,valid subspecies,1,Banos ECD TU,-1.4,-74,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
+10428250,10428250_V_lowres.png,ventral,melpomene,,male,ab._rubra,,subspecies synonym,1,Caradoc (Hda) PER CU,-13.36666667,-70.95,m. schunkei,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
+10427979,,dorsal,melpomene,rosina_S,male,rosina_S,,valid subspecies,1,Turrialba CRI CA,9.883333333,-83.63333333,m. rosina,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
+10428803,10428803_D_lowres.png,dorsal,erato,guarica,female,guarica,,valid subspecies,1,Fusagasuga COL CN,4.35,-74.36666667,e. guarica,
+10428169,10428169_V_lowres.png,ventral,melpomene,plesseni,male,f._pura,ST,subspecies synonym,1,Canelos ECD PA,-1.583333333,73,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
+10428321,10428321_D_lowres.png,,melpomene,nanna,male,nanna,ST,valid subspecies,1,Espirito Santo BRA ES,-20.33333333,-40.28333333,m. nanna,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
diff --git a/tests/components/test_divs.py b/tests/components/test_divs.py
@@ -29,7 +29,7 @@ def test_get_img_div():
             'Subspecies': ['subspecies1', 'subspecies2', 'subspecies4'],
             'View': ['ventral', 'ventral', 'dorsal'],
             'Sex': ['male', 'female', 'female'],
-            'hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym']
+            'Hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym']
         }
     df = pd.DataFrame(data = data)
 

diff --git a/tests/components/test_graphs.py b/tests/components/test_graphs.py
@@ -4,7 +4,9 @@
 
 # Define test data
 df = pd.read_csv("test_data/HCGSD_full_testNA.csv")
-included_features = ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename']
+# Update columns since not running through parse
+df.columns = df.columns.str.capitalize()
+included_features = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename']
 processed_df, cat_list = get_data(df, True, included_features)
 
 def test_make_hist_plot():

diff --git a/tests/components/test_query.py b/tests/components/test_query.py
@@ -23,23 +23,23 @@ def test_get_data(self):
         data = {
             'Species': ['melpomene', 'melpomene', 'erato', 'melpomene', 'erato', 'species3'],
             'Subspecies': ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', None],
-            'lat': [-13.43, 5.25, 5.25, 9.9, 5.25, 9.9],
-            'lon': [-70.38,  -55.25, -55.25, -83.73, -55.25, -55.25]
+            'Lat': [-13.43, 5.25, 5.25, 9.9, 5.25, 9.9],
+            'Lon': [-70.38,  -55.25, -55.25, -83.73, -55.25, -55.25]
         }
         cat_list = [{'label': 'Species', 'value': 'Species'},
                     {'label': 'Subspecies', 'value': 'Subspecies'},
                     {'label':'View', 'value': 'View'},
                     {'label': 'Sex', 'value': 'Sex'},
-                    {'label': 'Hybrid Status', 'value':'hybrid_stat'},
-                    {'label': 'Locality', 'value': 'locality'}]
-        features = ['Species', 'Subspecies', 'lat', 'lon']
+                    {'label': 'Hybrid Status', 'value':'Hybrid_stat'},
+                    {'label': 'Locality', 'value': 'Locality'}]
+        features = ['Species', 'Subspecies', 'Lat', 'Lon']
         locality = ['-13.43|-70.38', '5.25|-55.25', '5.25|-55.25', '9.9|-83.73','5.25|-55.25', '9.9|-55.25']
 
         # Test with mapping = True (location data)
         df = pd.DataFrame(data = data)
         result_df, result_list = get_data(df, True, features)
         self.assertEqual(result_df['lat-lon'].tolist(), locality)
-        self.assertEqual(result_df['locality'].tolist(), locality)
+        self.assertEqual(result_df['Locality'].tolist(), locality)
         self.assertEqual(result_df["Samples_at_locality"].tolist(), [1,3,3,1,3,1])
         self.assertEqual(result_df["Species_at_locality"].tolist(), ['melpomene', 'melpomene, erato', 'melpomene, erato', 'melpomene', 'melpomene, erato', 'species3'])
         self.assertEqual(result_df["Subspecies_at_locality"].tolist(), ['schunkei', 'nanna, erato, guarica', 'nanna, erato, guarica', 'rosina_N', 'nanna, erato, guarica', 'unknown'])
@@ -48,8 +48,7 @@ def test_get_data(self):
         # Test with mapping = False (no location data)
         df2 = pd.DataFrame(data = {key: data[key] for key in ['Species', 'Subspecies']})
         result_df2, result2_list = get_data(df2, False, features[:2])
-        #self.assertEqual('locality' not in result_df2.columns, True)
-        self.assertEqual(result_df2['locality'].tolist(), ['unknown' for i in range(len(locality))])
+        self.assertEqual(result_df2['Locality'].tolist(), ['unknown' for i in range(len(locality))])
         self.assertEqual(result_df2["Species"].tolist(), ['melpomene', 'melpomene', 'erato', 'melpomene', 'erato', 'species3'])
         self.assertEqual(result_df2["Subspecies"].tolist(), ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', 'unknown'])
         self.assertEqual(result2_list, cat_list)
@@ -62,7 +61,7 @@ def test_get_filenames(self):
             'Subspecies': ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', 'subspecies6', 'subspecies6'],
             'View': ['ventral', 'ventral', 'ventral', 'dorsal', 'dorsal', 'ventral', 'dorsal'],
             'Sex': ['male', 'female', 'female', 'male', 'female', 'male', 'female'],
-            'hybrid_stat': ['subspecies synonym', 
+            'Hybrid_stat': ['subspecies synonym', 
                             'valid subspecies', 
                             'subspecies synonym', 
                             'valid subspecies', 

diff --git a/tests/test_app_callbacks.py b/tests/test_app_callbacks.py
@@ -3,7 +3,7 @@
 from dashboard import update_dist_view, update_dist_plot, update_pie_plot, set_subspecies_options, update_display
 
 # Define test data
-data = {'processed_df': '{"columns":["Species","Subspecies","View","Sex","hybrid_stat","lat","lon","lat-lon","Samples_at_locality","Species_at_locality","Subspecies_at_locality"],"index":[0,1,2,3,4,5,6,7,8,9],"data":[["erato","notabilis","unknown","unknown","subspecies synonym",-1.583333333,-77.75,"-1.583333333|-77.75",1,"erato","notabilis"],["erato","petiverana","ventral","male","valid subspecies",18.66666667,-96.98333333,"18.66666667|-96.98333333",1,"erato","petiverana"],["unknown","petiverana","ventral","male","valid subspecies","unknown",-84.68333333,"unknown|-84.68333333",1,"unknown","petiverana"],["erato","phyllis","dorsal","male","subspecies synonym",-27.45,-58.98333333,"-27.45|-58.98333333",1,"erato","phyllis"],["unknown","plesseni","ventral","male","valid subspecies",-1.4,"unknown","-1.4|unknown",1,"unknown","plesseni"],["melpomene","unknown","ventral","male","subspecies synonym",-13.36666667,-70.95,"-13.36666667|-70.95",1,"melpomene","unknown"],["melpomene","rosina_S","dorsal","male","valid subspecies",9.883333333,-83.63333333,"9.883333333|-83.63333333",1,"melpomene","rosina_S"],["erato","guarica","dorsal","female","valid subspecies",4.35,-74.36666667,"4.35|-74.36666667",1,"erato","guarica"],["melpomene","plesseni","ventral","male","subspecies synonym",-1.583333333,"unknown","-1.583333333|unknown",1,"melpomene","plesseni"],["melpomene","nanna","unknown","male","valid subspecies",-20.33333333,-40.28333333,"-20.33333333|-40.28333333",1,"melpomene","nanna"]]}',
+data = {'processed_df': '{"columns":["Species","Subspecies","View","Sex","Hybrid_stat","Lat","Lon","lat-lon","Samples_at_locality","Species_at_locality","Subspecies_at_locality"],"index":[0,1,2,3,4,5,6,7,8,9],"data":[["erato","notabilis","unknown","unknown","subspecies synonym",-1.583333333,-77.75,"-1.583333333|-77.75",1,"erato","notabilis"],["erato","petiverana","ventral","male","valid subspecies",18.66666667,-96.98333333,"18.66666667|-96.98333333",1,"erato","petiverana"],["unknown","petiverana","ventral","male","valid subspecies","unknown",-84.68333333,"unknown|-84.68333333",1,"unknown","petiverana"],["erato","phyllis","dorsal","male","subspecies synonym",-27.45,-58.98333333,"-27.45|-58.98333333",1,"erato","phyllis"],["unknown","plesseni","ventral","male","valid subspecies",-1.4,"unknown","-1.4|unknown",1,"unknown","plesseni"],["melpomene","unknown","ventral","male","subspecies synonym",-13.36666667,-70.95,"-13.36666667|-70.95",1,"melpomene","unknown"],["melpomene","rosina_S","dorsal","male","valid subspecies",9.883333333,-83.63333333,"9.883333333|-83.63333333",1,"melpomene","rosina_S"],["erato","guarica","dorsal","female","valid subspecies",4.35,-74.36666667,"4.35|-74.36666667",1,"erato","guarica"],["melpomene","plesseni","ventral","male","subspecies synonym",-1.583333333,"unknown","-1.583333333|unknown",1,"melpomene","plesseni"],["melpomene","nanna","unknown","male","valid subspecies",-20.33333333,-40.28333333,"-20.33333333|-40.28333333",1,"melpomene","nanna"]]}',
         'all_species': {'Erato': ['Any-Erato', 'notabilis', 'petiverana', 'phyllis', 'guarica'], 'Unknown': ['Any-Unknown', 'petiverana', 'plesseni'], 'Melpomene': ['Any-Melpomene', 'unknown', 'rosina_S', 'plesseni', 'nanna'], 'Any': ['Any', 'notabilis', 'petiverana', 'phyllis', 'plesseni', 'unknown', 'rosina_S', 'guarica', 'nanna']}, 
         'mapping': True, 
         'images': True}