Skip to content

Commit

Permalink
Merge pull request #51 from Imageomics/feature/long
Browse files Browse the repository at this point in the history
Recognize `long` and `lon` for longitude, also removes case-sensitivity (sets all columns to capitalized values).
  • Loading branch information
egrace479 authored Sep 1, 2023
2 parents c14bd04 + 71cfc40 commit ee5c877
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 54 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ For full dashboard functionality, upload a CSV or XLS file with the following co
- `Sex`: Sex of each sample.
- `hybrid_stat`: Hybrid status of each sample (eg., 'valid_subspecies', 'subspecies_synonym', or 'unknown').
- `lat`*: Latitude at which image was taken or specimen was collected: number in [-90,90].
- `lon`*: Longitude at which image was taken or specimen was collected: number in [-180,180].
- `lon`*: Longitude at which image was taken or specimen was collected: number in [-180,180]. `long` will also be accepted.
- `file_url`*: URL to access file.

***Note:**
- Column names are **not** case-sensitive.
- `lat` and `lon` columns are not required to utilize the dashboard, but there will be no map view if they are not included. Blank (or null) entries are recorded as `unknown`, and thus excluded from map view.
- `Image_filename` and `file_url` are not required, but there will be no sample images option if either one is not included.
- `locality` may be provided, otherwise it will take on the value `lat|lon` or `unknown` if these are not provided.
Expand Down
8 changes: 4 additions & 4 deletions components/divs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
{'label': 'Subspecies', 'value': 'Subspecies'},
{'label':'View', 'value': 'View'},
{'label': 'Sex', 'value': 'Sex'},
{'label': 'Hybrid Status', 'value':'hybrid_stat'},
{'label': 'Locality', 'value': 'locality'}
{'label': 'Hybrid Status', 'value':'Hybrid_stat'},
{'label': 'Locality', 'value': 'Locality'}
]
DOCS_URL = "https://github.com/Imageomics/dashboard-prototype#how-it-works"
DOCS_LINK = html.A("documentation",
Expand Down Expand Up @@ -196,8 +196,8 @@ def get_img_div(df, all_species, img_url):
style = QUARTER_DIV_STYLE
),
html.Div([
dcc.Checklist(df.hybrid_stat.unique(),
df.hybrid_stat.unique()[0:2],
dcc.Checklist(df.Hybrid_stat.unique(),
df.Hybrid_stat.unique()[0:2],
id = 'hybrid?')],
style = QUARTER_DIV_STYLE
),
Expand Down
8 changes: 4 additions & 4 deletions components/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def make_map(df, color_by):
# only use entries that have valid lat & lon for mapping
df = df.loc[df['lat-lon'].str.contains('unknown') == False]
fig = px.scatter_mapbox(df,
lat = "lat",
lon = "lon",
lat = "Lat",
lon = "Lon",
#projection = "natural earth",
custom_data = ["Samples_at_locality", "Species_at_locality", "Subspecies_at_locality"],
size = "Samples_at_locality",
Expand All @@ -64,8 +64,8 @@ def make_map(df, color_by):
title = "Distribution of Samples")

fig.update_traces(hovertemplate =
"Latitude: %{lat}<br>"+
"Longitude: %{lon}<br>" +
"Latitude: %{Lat}<br>"+
"Longitude: %{Lon}<br>" +
"Samples at lat/lon: %{customdata[0]}<br>" +
"Species at lat/lon: %{customdata[1]}<br>" +
"Subspecies at lat/lon: %{customdata[2]}<br>"
Expand Down
22 changes: 11 additions & 11 deletions components/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get_data(df, mapping, features):
df - DataFrame of the data to visualize.
mapping - Boolean. True when lat/lon are given in dataset.
features - List of features (columns) included in the DataFrame. This is a subset of the suggested columns:
'Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename'
'Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename'
Returns:
--------
Expand All @@ -29,24 +29,24 @@ def get_data(df, mapping, features):
# Will likely choose to calculate and return this in later instance
cat_list = [{'label': 'Species', 'value': 'Species'},
{'label': 'Subspecies', 'value': 'Subspecies'},
{'label':'View', 'value': 'View'},
{'label': 'View', 'value': 'View'},
{'label': 'Sex', 'value': 'Sex'},
{'label': 'Hybrid Status', 'value':'hybrid_stat'},
{'label': 'Locality', 'value': 'locality'}
{'label': 'Hybrid Status', 'value':'Hybrid_stat'},
{'label': 'Locality', 'value': 'Locality'}
]

df = df.copy()
df = df.fillna('unknown')
features.append('locality')
features.append('Locality')

# If we don't have lat/lon, just return DataFrame with otherwise required features.
if not mapping:
if 'locality' not in df.columns:
df['locality'] = 'unknown'
if 'Locality' not in df.columns:
df['Locality'] = 'unknown'
return df[features], cat_list

# else lat and lon are in dataset, so process locality information
df['lat-lon'] = df['lat'].astype(str) + '|' + df['lon'].astype(str)
df['lat-lon'] = df['Lat'].astype(str) + '|' + df['Lon'].astype(str)
df["Samples_at_locality"] = df['lat-lon'].map(df['lat-lon'].value_counts()) # will duplicate if multiple views of same sample

# Count and record number of species and subspecies at each lat-lon
Expand All @@ -56,8 +56,8 @@ def get_data(df, mapping, features):
df.loc[df['lat-lon'] == lat_lon, "Species_at_locality"] = ", ".join(species_list)
df.loc[df['lat-lon'] == lat_lon, "Subspecies_at_locality"] = ", ".join(subspecies_list)

if 'locality' not in df.columns:
df['locality'] = df['lat-lon'] # contains "unknown" if lat or lon null
if 'Locality' not in df.columns:
df['Locality'] = df['lat-lon'] # contains "unknown" if lat or lon null

new_features = ['lat-lon', "Samples_at_locality", "Species_at_locality", "Subspecies_at_locality"]
for feature in new_features:
Expand Down Expand Up @@ -157,7 +157,7 @@ def get_filenames(df, subspecies, view, sex, hybrid, num_images):
df_sub = df.loc[df.Subspecies.isin(subspecies)].copy()
df_sub = df_sub.loc[df_sub.View.isin(view)]
df_sub = df_sub.loc[df_sub.Sex.isin(sex)]
df_sub = df_sub.loc[df_sub.hybrid_stat.isin(hybrid)]
df_sub = df_sub.loc[df_sub.Hybrid_stat.isin(hybrid)]

num_entries = len(df_sub)
# Filter out any entries that have missing filenames or URLs:
Expand Down
24 changes: 16 additions & 8 deletions dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,21 @@ def parse_contents(contents, filename):
# If no image urls, disable sample image options
mapping = True
img_urls = True
features = ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename']
features = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename']
included_features = []
df.columns = df.columns.str.capitalize()
for feature in features:
if feature not in list(df.columns):
if feature == 'lat' or feature == 'lon':
mapping = False
elif feature == 'file_url':
if feature == 'Lat' or feature == 'Lon':
if feature == 'Lon':
if 'Long' not in list(df.columns):
mapping = False
else:
df = df.rename(columns = {"Long": "Lon"})
included_features.append('Lon')
else:
mapping = False
elif feature == 'File_url':
img_urls = False
elif feature == 'Image_filename':
# If 'Image_filename' missing, return missing column if 'file_url' is included.
Expand All @@ -102,10 +110,10 @@ def parse_contents(contents, filename):
if mapping:
try:
# Check lat and lon within appropriate ranges (lat: [-90, 90], lon: [-180, 180])
valid_lat = df['lat'].astype(float).between(-90, 90)
df.loc[~valid_lat, 'lat'] = 'unknown'
valid_lon = df['lon'].astype(float).between(-180, 180)
df.loc[~valid_lon, 'lon'] = 'unknown'
valid_lat = df['Lat'].astype(float).between(-90, 90)
df.loc[~valid_lat, 'Lat'] = 'unknown'
valid_lon = df['Lon'].astype(float).between(-180, 180)
df.loc[~valid_lon, 'Lon'] = 'unknown'
except ValueError as e:
print(e)
return json.dumps({'error': {'mapping': str(e)}})
Expand Down
11 changes: 11 additions & 0 deletions test_data/HCGSD_test_latLong.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
NHM_Specimen,Image_filename,View,Species,Subspecies,Sex,addit_taxa_info,type_stat,hybrid_stat,in_reduced,locality,lat,long,speciesdesig,file_url
10429021,10429021_V_lowres.png,,erato,notabilis,,f._notabilis,,subspecies synonym,1,,-1.583333333,-77.75,e. notabilis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
10428972,10428972_V_lowres.png,ventral,erato,petiverana,male,petiverana,,valid subspecies,1,Songolica (= Zongolica) MEX VC,18.66666667,-96.98333333,e. petiverana,
10429172,,ventral,,petiverana,male,petiverana,,valid subspecies,1,San Ramon NIC ZE,89,-84.68333333,e. petiverana,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
10428595,10428595_D_lowres.png,dorsal,erato,phyllis,male,f._phyllis,,subspecies synonym,1,Resistencia ARG CH,-27.45,-58.98333333,e. phyllis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
10428140,10428140_V_lowres.png,ventral,,plesseni,male,plesseni,,valid subspecies,1,Banos ECD TU,-1.4,-74,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
10428250,10428250_V_lowres.png,ventral,melpomene,,male,ab._rubra,,subspecies synonym,1,Caradoc (Hda) PER CU,-13.36666667,-70.95,m. schunkei,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
10427979,,dorsal,melpomene,rosina_S,male,rosina_S,,valid subspecies,1,Turrialba CRI CA,9.883333333,-83.63333333,m. rosina,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
10428803,10428803_D_lowres.png,dorsal,erato,guarica,female,guarica,,valid subspecies,1,Fusagasuga COL CN,4.35,-74.36666667,e. guarica,
10428169,10428169_V_lowres.png,ventral,melpomene,plesseni,male,f._pura,ST,subspecies synonym,1,Canelos ECD PA,-1.583333333,73,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
10428321,10428321_D_lowres.png,,melpomene,nanna,male,nanna,ST,valid subspecies,1,Espirito Santo BRA ES,-20.33333333,-40.28333333,m. nanna,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
2 changes: 1 addition & 1 deletion tests/components/test_divs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_get_img_div():
'Subspecies': ['subspecies1', 'subspecies2', 'subspecies4'],
'View': ['ventral', 'ventral', 'dorsal'],
'Sex': ['male', 'female', 'female'],
'hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym']
'Hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym']
}
df = pd.DataFrame(data = data)

Expand Down
4 changes: 3 additions & 1 deletion tests/components/test_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

# Define test data
df = pd.read_csv("test_data/HCGSD_full_testNA.csv")
included_features = ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename']
# Update columns since not running through parse
df.columns = df.columns.str.capitalize()
included_features = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename']
processed_df, cat_list = get_data(df, True, included_features)

def test_make_hist_plot():
Expand Down
17 changes: 8 additions & 9 deletions tests/components/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,23 @@ def test_get_data(self):
data = {
'Species': ['melpomene', 'melpomene', 'erato', 'melpomene', 'erato', 'species3'],
'Subspecies': ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', None],
'lat': [-13.43, 5.25, 5.25, 9.9, 5.25, 9.9],
'lon': [-70.38, -55.25, -55.25, -83.73, -55.25, -55.25]
'Lat': [-13.43, 5.25, 5.25, 9.9, 5.25, 9.9],
'Lon': [-70.38, -55.25, -55.25, -83.73, -55.25, -55.25]
}
cat_list = [{'label': 'Species', 'value': 'Species'},
{'label': 'Subspecies', 'value': 'Subspecies'},
{'label':'View', 'value': 'View'},
{'label': 'Sex', 'value': 'Sex'},
{'label': 'Hybrid Status', 'value':'hybrid_stat'},
{'label': 'Locality', 'value': 'locality'}]
features = ['Species', 'Subspecies', 'lat', 'lon']
{'label': 'Hybrid Status', 'value':'Hybrid_stat'},
{'label': 'Locality', 'value': 'Locality'}]
features = ['Species', 'Subspecies', 'Lat', 'Lon']
locality = ['-13.43|-70.38', '5.25|-55.25', '5.25|-55.25', '9.9|-83.73','5.25|-55.25', '9.9|-55.25']

# Test with mapping = True (location data)
df = pd.DataFrame(data = data)
result_df, result_list = get_data(df, True, features)
self.assertEqual(result_df['lat-lon'].tolist(), locality)
self.assertEqual(result_df['locality'].tolist(), locality)
self.assertEqual(result_df['Locality'].tolist(), locality)
self.assertEqual(result_df["Samples_at_locality"].tolist(), [1,3,3,1,3,1])
self.assertEqual(result_df["Species_at_locality"].tolist(), ['melpomene', 'melpomene, erato', 'melpomene, erato', 'melpomene', 'melpomene, erato', 'species3'])
self.assertEqual(result_df["Subspecies_at_locality"].tolist(), ['schunkei', 'nanna, erato, guarica', 'nanna, erato, guarica', 'rosina_N', 'nanna, erato, guarica', 'unknown'])
Expand All @@ -48,8 +48,7 @@ def test_get_data(self):
# Test with mapping = False (no location data)
df2 = pd.DataFrame(data = {key: data[key] for key in ['Species', 'Subspecies']})
result_df2, result2_list = get_data(df2, False, features[:2])
#self.assertEqual('locality' not in result_df2.columns, True)
self.assertEqual(result_df2['locality'].tolist(), ['unknown' for i in range(len(locality))])
self.assertEqual(result_df2['Locality'].tolist(), ['unknown' for i in range(len(locality))])
self.assertEqual(result_df2["Species"].tolist(), ['melpomene', 'melpomene', 'erato', 'melpomene', 'erato', 'species3'])
self.assertEqual(result_df2["Subspecies"].tolist(), ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', 'unknown'])
self.assertEqual(result2_list, cat_list)
Expand All @@ -62,7 +61,7 @@ def test_get_filenames(self):
'Subspecies': ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', 'subspecies6', 'subspecies6'],
'View': ['ventral', 'ventral', 'ventral', 'dorsal', 'dorsal', 'ventral', 'dorsal'],
'Sex': ['male', 'female', 'female', 'male', 'female', 'male', 'female'],
'hybrid_stat': ['subspecies synonym',
'Hybrid_stat': ['subspecies synonym',
'valid subspecies',
'subspecies synonym',
'valid subspecies',
Expand Down
2 changes: 1 addition & 1 deletion tests/test_app_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dashboard import update_dist_view, update_dist_plot, update_pie_plot, set_subspecies_options, update_display

# Define test data
data = {'processed_df': '{"columns":["Species","Subspecies","View","Sex","hybrid_stat","lat","lon","lat-lon","Samples_at_locality","Species_at_locality","Subspecies_at_locality"],"index":[0,1,2,3,4,5,6,7,8,9],"data":[["erato","notabilis","unknown","unknown","subspecies synonym",-1.583333333,-77.75,"-1.583333333|-77.75",1,"erato","notabilis"],["erato","petiverana","ventral","male","valid subspecies",18.66666667,-96.98333333,"18.66666667|-96.98333333",1,"erato","petiverana"],["unknown","petiverana","ventral","male","valid subspecies","unknown",-84.68333333,"unknown|-84.68333333",1,"unknown","petiverana"],["erato","phyllis","dorsal","male","subspecies synonym",-27.45,-58.98333333,"-27.45|-58.98333333",1,"erato","phyllis"],["unknown","plesseni","ventral","male","valid subspecies",-1.4,"unknown","-1.4|unknown",1,"unknown","plesseni"],["melpomene","unknown","ventral","male","subspecies synonym",-13.36666667,-70.95,"-13.36666667|-70.95",1,"melpomene","unknown"],["melpomene","rosina_S","dorsal","male","valid subspecies",9.883333333,-83.63333333,"9.883333333|-83.63333333",1,"melpomene","rosina_S"],["erato","guarica","dorsal","female","valid subspecies",4.35,-74.36666667,"4.35|-74.36666667",1,"erato","guarica"],["melpomene","plesseni","ventral","male","subspecies synonym",-1.583333333,"unknown","-1.583333333|unknown",1,"melpomene","plesseni"],["melpomene","nanna","unknown","male","valid subspecies",-20.33333333,-40.28333333,"-20.33333333|-40.28333333",1,"melpomene","nanna"]]}',
data = {'processed_df': '{"columns":["Species","Subspecies","View","Sex","Hybrid_stat","Lat","Lon","lat-lon","Samples_at_locality","Species_at_locality","Subspecies_at_locality"],"index":[0,1,2,3,4,5,6,7,8,9],"data":[["erato","notabilis","unknown","unknown","subspecies synonym",-1.583333333,-77.75,"-1.583333333|-77.75",1,"erato","notabilis"],["erato","petiverana","ventral","male","valid subspecies",18.66666667,-96.98333333,"18.66666667|-96.98333333",1,"erato","petiverana"],["unknown","petiverana","ventral","male","valid subspecies","unknown",-84.68333333,"unknown|-84.68333333",1,"unknown","petiverana"],["erato","phyllis","dorsal","male","subspecies synonym",-27.45,-58.98333333,"-27.45|-58.98333333",1,"erato","phyllis"],["unknown","plesseni","ventral","male","valid subspecies",-1.4,"unknown","-1.4|unknown",1,"unknown","plesseni"],["melpomene","unknown","ventral","male","subspecies synonym",-13.36666667,-70.95,"-13.36666667|-70.95",1,"melpomene","unknown"],["melpomene","rosina_S","dorsal","male","valid subspecies",9.883333333,-83.63333333,"9.883333333|-83.63333333",1,"melpomene","rosina_S"],["erato","guarica","dorsal","female","valid subspecies",4.35,-74.36666667,"4.35|-74.36666667",1,"erato","guarica"],["melpomene","plesseni","ventral","male","subspecies synonym",-1.583333333,"unknown","-1.583333333|unknown",1,"melpomene","plesseni"],["melpomene","nanna","unknown","male","valid subspecies",-20.33333333,-40.28333333,"-20.33333333|-40.28333333",1,"melpomene","nanna"]]}',
'all_species': {'Erato': ['Any-Erato', 'notabilis', 'petiverana', 'phyllis', 'guarica'], 'Unknown': ['Any-Unknown', 'petiverana', 'plesseni'], 'Melpomene': ['Any-Melpomene', 'unknown', 'rosina_S', 'plesseni', 'nanna'], 'Any': ['Any', 'notabilis', 'petiverana', 'phyllis', 'plesseni', 'unknown', 'rosina_S', 'guarica', 'nanna']},
'mapping': True,
'images': True}
Expand Down
Loading

0 comments on commit ee5c877

Please sign in to comment.