From 6fec4343aa96c27b9455593b8bfd758c456b09b3 Mon Sep 17 00:00:00 2001 From: Geoff Ower Date: Mon, 3 Jul 2023 16:01:37 -0500 Subject: [PATCH 1/4] Add modified and modifiedBy (close #3464) --- lib/export/coldp.rb | 29 ++++++-- lib/export/coldp/files/description.rb | 10 ++- lib/export/coldp/files/name.rb | 86 ++++++++++++----------- lib/export/coldp/files/reference.rb | 14 ++-- lib/export/coldp/files/synonym.rb | 20 +++--- lib/export/coldp/files/taxon.rb | 40 ++++++----- lib/export/coldp/files/vernacular_name.rb | 24 ++++--- 7 files changed, 134 insertions(+), 89 deletions(-) diff --git a/lib/export/coldp.rb b/lib/export/coldp.rb index a2a16db55d..2fe51dd5b9 100644 --- a/lib/export/coldp.rb +++ b/lib/export/coldp.rb @@ -27,6 +27,26 @@ def self.otus(otu_id) .where('(otus.name IS NULL) OR (otus.name = taxon_names.cached)') end + def self.project_members(project_id) + project_members = {} + ProjectMember.where(project_id: project_id).each do |pm| + if pm.user.orcid.nil? + project_members[pm.user_id] = pm.user.name + else + project_members[pm.user_id] = pm.user.orcid.gsub('orcid.org/', '') # orcid.org domain likely not expected by the checklistbank importer? + end + end + project_members + end + + def self.modified(updated_at) + updated_at.iso8601() + end + + def self.modified_by(updated_by_id, project_members) + project_members[updated_by_id] + end + def self.export(otu_id, prefer_unlabelled_otus: true) otus = otus(otu_id) @@ -35,6 +55,7 @@ def self.export(otu_id, prefer_unlabelled_otus: true) otu = ::Otu.find(otu_id) project = ::Project.find(otu.project_id) + project_members = project_members(otu.project_id) # TODO: This will likely have to change, it is renamed on serving the file. zip_file_path = "/tmp/_#{SecureRandom.hex(8)}_coldp.zip" @@ -56,19 +77,19 @@ def self.export(otu_id, prefer_unlabelled_otus: true) Zip::File.open(zip_file_path, Zip::File::CREATE) do |zipfile| (FILETYPES - ['Name']).each do |ft| m = "Export::Coldp::Files::#{ft}".safe_constantize - zipfile.get_output_stream("#{ft}.csv") { |f| f.write m.generate(otus, ref_csv) } + zipfile.get_output_stream("#{ft}.csv") { |f| f.write m.generate(otus, project_members, ref_csv) } end - zipfile.get_output_stream('Name.csv') { |f| f.write Export::Coldp::Files::Name.generate(otu, ref_csv) } + zipfile.get_output_stream('Name.csv') { |f| f.write Export::Coldp::Files::Name.generate(otu, project_members, ref_csv) } zipfile.get_output_stream('Taxon.csv') do |f| - f.write Export::Coldp::Files::Taxon.generate(otus, otu_id, ref_csv, prefer_unlabelled_otus: prefer_unlabelled_otus) + f.write Export::Coldp::Files::Taxon.generate(otus, project_members, otu_id, ref_csv) end # Sort the refs by full citation string sorted_refs = ref_csv.values.sort{|a,b| a[1] <=> b[1]} d = CSV.generate(col_sep: "\t") do |csv| - csv << %w{ID citation doi} # author year source details + csv << %w{ID citation doi modified modifiedBy} # author year source details sorted_refs.each do |r| csv << r end diff --git a/lib/export/coldp/files/description.rb b/lib/export/coldp/files/description.rb index 843a4f6db3..7e48ce8d51 100644 --- a/lib/export/coldp/files/description.rb +++ b/lib/export/coldp/files/description.rb @@ -15,7 +15,7 @@ def self.reference_id(content) nil end - def self.generate(otus, reference_csv = nil ) + def self.generate(otus, project_members, reference_csv = nil ) CSV.generate(col_sep: "\t") do |csv| csv << %w{ @@ -24,6 +24,8 @@ def self.generate(otus, reference_csv = nil ) description language referenceID + modified + modifiedBy } otus.joins(:contents).each do |o| @@ -35,10 +37,12 @@ def self.generate(otus, reference_csv = nil ) c.topic_id, # TODO: refence EOL or related unitified topic DOIs c.text, c.language&.alpha_3_bibliographic, - sources.collect{|a| a.id}.join(',') + sources.collect{|a| a.id}.join(','), + Export::Coldp.modified(c[:updated_at]), # modified + Export::Coldp.modified_by(c[:updated_by_id], project_members) # modifiedBy ] - Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv) if reference_csv + Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv, project_members) if reference_csv end end end diff --git a/lib/export/coldp/files/name.rb b/lib/export/coldp/files/name.rb index 86aececac1..c627071738 100644 --- a/lib/export/coldp/files/name.rb +++ b/lib/export/coldp/files/name.rb @@ -58,7 +58,7 @@ def self.nom_status_field(taxon_name) # Invalid Protonyms are rendered only as their original Combination # @param t [Protonym] # only place that var./frm can be handled. - def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id) + def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members) e = t.original_combination_elements infraspecific_element = t.original_combination_infraspecific_element(e) @@ -116,24 +116,26 @@ def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id end csv << [ - id, # ID - basionym_id, # basionymID - clean_sic(t.cached_original_combination), # scientificName - authorship_field(t, true), # authorship - rank, # rank - uninomial, # uninomial - genus, # genus - subgenus, # subgenus (no parens) - species, # species - infraspecific_element ? infraspecific_element.last : nil, # infraspecificEpithet - origin_citation&.source_id, # referenceID | - origin_citation&.pages, # publishedInPage | !! All origin citations get added to reference_csv via the main loop, not here - t.year_of_publication, # publishedInYear | - true, # original - code_field(t), # code - nil, # status https://api.checklistbank.org/vocab/nomStatus - nil, # link (probably TW public or API) - remarks(t, name_remarks_vocab_id), # remarks + id, # ID + basionym_id, # basionymID + clean_sic(t.cached_original_combination), # scientificName + authorship_field(t, true), # authorship + rank, # rank + uninomial, # uninomial + genus, # genus + subgenus, # subgenus (no parens) + species, # species + infraspecific_element ? infraspecific_element.last : nil, # infraspecificEpithet + origin_citation&.source_id, # referenceID | + origin_citation&.pages, # publishedInPage | !! All origin citations get added to reference_csv via the main loop, not here + t.year_of_publication, # publishedInYear | + true, # original + code_field(t), # code + nil, # status https://api.checklistbank.org/vocab/nomStatus + nil, # link (probably TW public or API) + remarks(t, name_remarks_vocab_id), # remarks + Export::Coldp.modified(t[:updated_at]), # modified + Export::Coldp.modified_by(t[:updated_by_id], project_members) # modifiedBy ] end @@ -143,7 +145,7 @@ def self.clean_sic(name) # @params otu [Otu] # the top level OTU - def self.generate(otu, reference_csv = nil) + def self.generate(otu, project_members, reference_csv = nil) name_total = 0 CSV.generate(col_sep: "\t") do |csv| csv << %w{ @@ -165,6 +167,8 @@ def self.generate(otu, reference_csv = nil) status link remarks + modified + modifiedBy } Current.project_id = otu.project_id @@ -229,34 +233,36 @@ def self.generate(otu, reference_csv = nil) # Set is: no original combination OR (valid or invalid higher, valid lower, past combinations) if t.cached_original_combination.blank? || higher || t.is_valid? || t.is_combination? csv << [ - t.id, # ID - basionym_id, # basionymID - name_string, # scientificName # should just be t.cached - t.cached_author_year, # authorship - rank, # rank - uninomial, # uninomial <- if genus here - generic_epithet, # genus and below - IIF species or lower - infrageneric_epithet, # infragenericEpithet - specific_epithet, # specificEpithet - infraspecific_epithet, # infraspecificEpithet - origin_citation&.source_id, # publishedInID - origin_citation&.pages, # publishedInPage - t.year_of_publication, # publishedInYear - original, # original - code_field(t), # code - nom_status_field(t), # nomStatus - nil, # link (probably TW public or API) - remarks(t, name_remarks_vocab_id), # remarks + t.id, # ID + basionym_id, # basionymID + name_string, # scientificName # should just be t.cached + t.cached_author_year, # authorship + rank, # rank + uninomial, # uninomial <- if genus here + generic_epithet, # genus and below - IIF species or lower + infrageneric_epithet, # infragenericEpithet + specific_epithet, # specificEpithet + infraspecific_epithet, # infraspecificEpithet + origin_citation&.source_id, # publishedInID + origin_citation&.pages, # publishedInPage + t.year_of_publication, # publishedInYear + original, # original + code_field(t), # code + nom_status_field(t), # nomStatus + nil, # link (probably TW public or API) + remarks(t, name_remarks_vocab_id), # remarks + Export::Coldp.modified(t[:updated_at]), # modified + Export::Coldp.modified_by(t[:updated_by_id], project_members) # modifiedBy ] end # Here we truly want no higher if !t.cached_original_combination.blank? && (is_genus_species && !t.is_combination? && (!t.is_valid? || t.has_alternate_original?)) name_total += 1 - add_original_combination(t, csv, origin_citation, name_remarks_vocab_id) + add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members) end - Export::Coldp::Files::Reference.add_reference_rows([origin_citation.source].compact, reference_csv) if reference_csv && origin_citation + Export::Coldp::Files::Reference.add_reference_rows([origin_citation.source].compact, reference_csv, project_members) if reference_csv && origin_citation end end end diff --git a/lib/export/coldp/files/reference.rb b/lib/export/coldp/files/reference.rb index 3e29e686a4..5b73b03ba7 100644 --- a/lib/export/coldp/files/reference.rb +++ b/lib/export/coldp/files/reference.rb @@ -7,21 +7,21 @@ module Export::Coldp::Files::Reference # # !! It is not integrated yet. # - def self.generate(project_id) + def self.generate(project_id, project_members) CSV.generate do |csv| Source.joins(:project_sources).where(project_sources: {project_id: project_id} ).each do |source| - csv << ref_row(source) + csv << ref_row(source, project_members) end end end - def self.add_reference_rows(sources = [], reference_csv) + def self.add_reference_rows(sources = [], reference_csv, project_members) sources.each do |s| - reference_csv[s.id] = ref_row(s) + reference_csv[s.id] = ref_row(s, project_members) end end - def self.ref_row(source) + def self.ref_row(source, project_members) [ source.id, source.cached, @@ -29,7 +29,9 @@ def self.ref_row(source) # source.year, # source.journal, # source.source # reference_details(source), # details (pages, volume, year) - source.doi + source.doi, + Export::Coldp.modified(source[:updated_at]), # modified + Export::Coldp.modified_by(source[:updated_by_id], project_members) # modifiedBy ] end diff --git a/lib/export/coldp/files/synonym.rb b/lib/export/coldp/files/synonym.rb index 371224afd1..00ddec402b 100644 --- a/lib/export/coldp/files/synonym.rb +++ b/lib/export/coldp/files/synonym.rb @@ -26,10 +26,10 @@ def self.reference_id_field(otu) end # This is currently factored to use *no* ActiveRecord instances - def self.generate(otus, reference_csv = nil) + def self.generate(otus, project_members, reference_csv = nil) CSV.generate(col_sep: "\t") do |csv| - csv << %w{taxonID nameID status remarks referenceID} + csv << %w{taxonID nameID status remarks referenceID modified modifiedBy} # Only valid otus with taxon names, see lib/export/coldp.rb#otus otus.select('otus.id id, taxon_names.cached cached, otus.taxon_name_id taxon_name_id') @@ -60,7 +60,7 @@ def self.generate(otus, reference_csv = nil) # .where(cached_valid_taxon_name_id: o[2]) # == .historical_taxon_names # .where("( ((taxon_names.id != taxon_names.cached_valid_taxon_name_id) OR ((taxon_names.cached_original_combination != taxon_names.cached))) AND NOT (taxon_names.type = 'Combination' AND taxon_names.cached = ?))", o[1]) # see name.rb - c.pluck(:id, :cached, :cached_original_combination, :type, :rank_class, :cached_secondary_homonym) + c.pluck(:id, :cached, :cached_original_combination, :type, :rank_class, :cached_secondary_homonym, :updated_at, :updated_by_id) .each do |t| reified_id = ::Export::Coldp.reified_id(t[0], t[1], t[2]) @@ -88,11 +88,13 @@ def self.generate(otus, reference_csv = nil) end csv << [ - o[0], # taxonID attached to the current valid concept - reified_id, # nameID - nil, # Status TODO def status(taxon_name_id) - remarks_field, - nil, # Unclear what this means in TW + o[0], # taxonID attached to the current valid concept + reified_id, # nameID + nil, # status TODO: def status(taxon_name_id) + remarks_field, # remarks + nil, # referenceID Unclear what this means in TW + Export::Coldp.modified(t[6]), # modified + Export::Coldp.modified_by(t[7], project_members) # modifiedBy ] end end @@ -100,5 +102,5 @@ def self.generate(otus, reference_csv = nil) end # It is unclear what the relationship beyond "used" means. We likely need a sensu style model to record these assertions - # Export::Coldp::Files::Reference.add_reference_rows([], reference_csv) if reference_csv + # Export::Coldp::Files::Reference.add_reference_rows([], reference_csv, project_members) if reference_csv end diff --git a/lib/export/coldp/files/taxon.rb b/lib/export/coldp/files/taxon.rb index 16ea7dddcb..8e8c2a4ac3 100644 --- a/lib/export/coldp/files/taxon.rb +++ b/lib/export/coldp/files/taxon.rb @@ -110,7 +110,7 @@ def self.reference_id(sources) nil end - def self.generate(otus, root_otu_id = nil, reference_csv = nil, prefer_unlabelled_otus: true) + def self.generate(otus, project_members, root_otu_id = nil, reference_csv = nil, prefer_unlabelled_otus: true) # Until we have RC5 articulations we are simplifying handling the fact # that one taxon name can be used for many OTUs. Track to see that @@ -140,6 +140,8 @@ def self.generate(otus, root_otu_id = nil, reference_csv = nil, prefer_unlabelle environment link remarks + modified + modifiedBy } taxon_remarks_vocab_id = Predicate.find_by(uri: 'https://github.com/catalogueoflife/coldp#Taxon.remarks', @@ -182,25 +184,27 @@ def self.generate(otus, root_otu_id = nil, reference_csv = nil, prefer_unlabelle parent_id = (root_otu_id == o.id ? nil : parent_id ) csv << [ - o.id, # ID (Taxon) - parent_id, # parentID (Taxon) - o.taxon_name.id, # nameID (Name) - name_phrase(o, name_phrase_vocab_id), # namePhrase - provisional(o), # provisional - according_to_id(o), # accordingToID - scrutinizer(o), # scrutinizer - scrutinizer_id(o), # scrutinizerID - scrutinizer_date(o), # scrutizinerDate - reference_id(sources), # referenceID - predicate_value(o, :extinct), # extinct - predicate_value(o, :temporal_range_start), # temporalRangeStart - predicate_value(o, :temporal_range_end), # temporalRangeEnd - predicate_value(o, :lifezone), # environment (formerly named lifezone) - link(o), # link - remarks(o, taxon_remarks_vocab_id) # remarks + o.id, # ID (Taxon) + parent_id, # parentID (Taxon) + o.taxon_name.id, # nameID (Name) + name_phrase(o, name_phrase_vocab_id), # namePhrase + provisional(o), # provisional + according_to_id(o), # accordingToID + scrutinizer(o), # scrutinizer + scrutinizer_id(o), # scrutinizerID + scrutinizer_date(o), # scrutizinerDate + reference_id(sources), # referenceID + predicate_value(o, :extinct), # extinct + predicate_value(o, :temporal_range_start), # temporalRangeStart + predicate_value(o, :temporal_range_end), # temporalRangeEnd + predicate_value(o, :lifezone), # environment (formerly named lifezone) + link(o), # link + remarks(o, taxon_remarks_vocab_id), # remarks + Export::Coldp.modified(o[:updated_at]), # modified + Export::Coldp.modified_by(o[:updated_by_id], project_members) # modifiedBy ] - Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv) if reference_csv + Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv, project_members) if reference_csv end end end diff --git a/lib/export/coldp/files/vernacular_name.rb b/lib/export/coldp/files/vernacular_name.rb index 5d01e77a13..39784b17cc 100644 --- a/lib/export/coldp/files/vernacular_name.rb +++ b/lib/export/coldp/files/vernacular_name.rb @@ -6,6 +6,8 @@ # area # sex # reference_id +# modified +# modifiedBy # module Export::Coldp::Files::VernacularName @@ -39,7 +41,7 @@ def self.reference_id(common_name) nil end - def self.generate(otus, reference_csv = nil ) + def self.generate(otus, project_members, reference_csv = nil ) CSV.generate(col_sep: "\t") do |csv| # TODO: Biocuration attributes on these two @@ -53,6 +55,8 @@ def self.generate(otus, reference_csv = nil ) country area referenceID + modified + modifiedBy } otus.joins(:common_names).each do |o| @@ -60,16 +64,18 @@ def self.generate(otus, reference_csv = nil ) sources = n.sources.load csv << [ - o.id, - n.name, - transliteration(n), - n.language&.alpha_3_bibliographic, - n.geographic_area&.level0&.iso_3166_a2, - area(n), - sources.collect{|a| a.id}.join(',') # reference_id + o.id, # taxon_id + n.name, # name + transliteration(n), # transliteration + n.language&.alpha_3_bibliographic, # language + n.geographic_area&.level0&.iso_3166_a2, # country + area(n), # area + sources.collect{|a| a.id}.join(','), # reference_id + Export::Coldp.modified(n[:update_at]), # modified + Export::Coldp.modified_by(n[:updated_by_id], project_members) # modified_by ] - Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv) if reference_csv && sources.any? + Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv, project_members) if reference_csv && sources.any? end end end From a58be4e79e858809175d113e453c2afe7a478ec8 Mon Sep 17 00:00:00 2001 From: Geoff Ower Date: Mon, 3 Jul 2023 16:02:04 -0500 Subject: [PATCH 2/4] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fcd2c4ed3..06119dc62c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ This project does not yet adheres to [Semantic Versioning](https://semv ## [unreleased] ### Added +- modified and modifiedBy to the COL data package exporter [#3464] - DataAttribute columns for CollectingEvent and TaxonName filters - Added ranks for viruses - CachedMap framework - compute low-resolution maps quickly [#3010] From e740914353b8192d03af491ef0ade3d8bb3a3cdb Mon Sep 17 00:00:00 2001 From: Geoff Ower Date: Mon, 3 Jul 2023 16:02:47 -0500 Subject: [PATCH 3/4] Use identifier instead of cached --- app/models/person.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/person.rb b/app/models/person.rb index 43b21573a2..f0b93a753a 100644 --- a/app/models/person.rb +++ b/app/models/person.rb @@ -180,7 +180,7 @@ def full_last_name # Return [String, nil] # convenience, maybe a delegate: candidate def orcid - identifiers.where(type: 'Identifier::Global::Orcid').first&.cached + identifiers.where(type: 'Identifier::Global::Orcid').first&.identifier end # @param [Integer] person_id From 937f38baaed4ad9d1e86ce51c0a2c0bb8d210e61 Mon Sep 17 00:00:00 2001 From: Geoff Ower Date: Wed, 5 Jul 2023 13:39:16 -0500 Subject: [PATCH 4/4] Review fixes --- app/models/person.rb | 2 +- lib/export/coldp.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/models/person.rb b/app/models/person.rb index f0b93a753a..43b21573a2 100644 --- a/app/models/person.rb +++ b/app/models/person.rb @@ -180,7 +180,7 @@ def full_last_name # Return [String, nil] # convenience, maybe a delegate: candidate def orcid - identifiers.where(type: 'Identifier::Global::Orcid').first&.identifier + identifiers.where(type: 'Identifier::Global::Orcid').first&.cached end # @param [Integer] person_id diff --git a/lib/export/coldp.rb b/lib/export/coldp.rb index 2fe51dd5b9..5c41f82ffc 100644 --- a/lib/export/coldp.rb +++ b/lib/export/coldp.rb @@ -33,14 +33,14 @@ def self.project_members(project_id) if pm.user.orcid.nil? project_members[pm.user_id] = pm.user.name else - project_members[pm.user_id] = pm.user.orcid.gsub('orcid.org/', '') # orcid.org domain likely not expected by the checklistbank importer? + project_members[pm.user_id] = pm.user.orcid end end project_members end def self.modified(updated_at) - updated_at.iso8601() + updated_at.iso8601 end def self.modified_by(updated_by_id, project_members)