Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

3464 add modified/modifiedBy to CoLDP exporter #3479

Merged
merged 6 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ This project <em>does not yet</em> adheres to [Semantic Versioning](https://semv
## [unreleased]

### Added
- modified and modifiedBy to the COL data package exporter [#3464]
- Pagination to Labels and TypeMaterial .json endpoints [#3472]
- DataAttribute columns for CollectingEvent and TaxonName filters
- Added ranks for viruses
Expand Down Expand Up @@ -46,7 +47,7 @@ This project <em>does not yet</em> adheres to [Semantic Versioning](https://semv
- Fixed URL hostname string matching in some places.
- Matrix Column Coder throws an error after autosave ends and observation to be saved no longer exists


[#3464]: https://github.com/SpeciesFileGroup/taxonworks/issues/3464
[#3438]: https://github.com/SpeciesFileGroup/taxonworks/issues/3438
[#3472]: https://github.com/SpeciesFileGroup/taxonworks/issues/3472
[#3452]: https://github.com/SpeciesFileGroup/taxonworks/issues/3452
Expand Down
29 changes: 25 additions & 4 deletions lib/export/coldp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,26 @@ def self.otus(otu_id)
.where('(otus.name IS NULL) OR (otus.name = taxon_names.cached)')
end

def self.project_members(project_id)
project_members = {}
ProjectMember.where(project_id: project_id).each do |pm|
if pm.user.orcid.nil?
project_members[pm.user_id] = pm.user.name
else
project_members[pm.user_id] = pm.user.orcid
end
end
project_members
end

def self.modified(updated_at)
updated_at.iso8601
end

def self.modified_by(updated_by_id, project_members)
project_members[updated_by_id]
end

def self.export(otu_id, prefer_unlabelled_otus: true)
otus = otus(otu_id)

Expand All @@ -35,6 +55,7 @@ def self.export(otu_id, prefer_unlabelled_otus: true)

otu = ::Otu.find(otu_id)
project = ::Project.find(otu.project_id)
project_members = project_members(otu.project_id)

# TODO: This will likely have to change, it is renamed on serving the file.
zip_file_path = "/tmp/_#{SecureRandom.hex(8)}_coldp.zip"
Expand All @@ -56,19 +77,19 @@ def self.export(otu_id, prefer_unlabelled_otus: true)
Zip::File.open(zip_file_path, Zip::File::CREATE) do |zipfile|
(FILETYPES - ['Name']).each do |ft|
m = "Export::Coldp::Files::#{ft}".safe_constantize
zipfile.get_output_stream("#{ft}.csv") { |f| f.write m.generate(otus, ref_csv) }
zipfile.get_output_stream("#{ft}.csv") { |f| f.write m.generate(otus, project_members, ref_csv) }
end

zipfile.get_output_stream('Name.csv') { |f| f.write Export::Coldp::Files::Name.generate(otu, ref_csv) }
zipfile.get_output_stream('Name.csv') { |f| f.write Export::Coldp::Files::Name.generate(otu, project_members, ref_csv) }
zipfile.get_output_stream('Taxon.csv') do |f|
f.write Export::Coldp::Files::Taxon.generate(otus, otu_id, ref_csv, prefer_unlabelled_otus: prefer_unlabelled_otus)
f.write Export::Coldp::Files::Taxon.generate(otus, project_members, otu_id, ref_csv)
end

# Sort the refs by full citation string
sorted_refs = ref_csv.values.sort{|a,b| a[1] <=> b[1]}

d = CSV.generate(col_sep: "\t") do |csv|
csv << %w{ID citation doi} # author year source details
csv << %w{ID citation doi modified modifiedBy} # author year source details
sorted_refs.each do |r|
csv << r
end
Expand Down
10 changes: 7 additions & 3 deletions lib/export/coldp/files/description.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def self.reference_id(content)
nil
end

def self.generate(otus, reference_csv = nil )
def self.generate(otus, project_members, reference_csv = nil )
CSV.generate(col_sep: "\t") do |csv|

csv << %w{
Expand All @@ -24,6 +24,8 @@ def self.generate(otus, reference_csv = nil )
description
language
referenceID
modified
modifiedBy
}

otus.joins(:contents).each do |o|
Expand All @@ -35,10 +37,12 @@ def self.generate(otus, reference_csv = nil )
c.topic_id, # TODO: refence EOL or related unitified topic DOIs
c.text,
c.language&.alpha_3_bibliographic,
sources.collect{|a| a.id}.join(',')
sources.collect{|a| a.id}.join(','),
Export::Coldp.modified(c[:updated_at]), # modified
Export::Coldp.modified_by(c[:updated_by_id], project_members) # modifiedBy
]

Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv) if reference_csv
Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv, project_members) if reference_csv
end
end
end
Expand Down
86 changes: 46 additions & 40 deletions lib/export/coldp/files/name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def self.nom_status_field(taxon_name)
# Invalid Protonyms are rendered only as their original Combination
# @param t [Protonym]
# only place that var./frm can be handled.
def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id)
def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members)
e = t.original_combination_elements

infraspecific_element = t.original_combination_infraspecific_element(e)
Expand Down Expand Up @@ -116,24 +116,26 @@ def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id
end

csv << [
id, # ID
basionym_id, # basionymID
clean_sic(t.cached_original_combination), # scientificName
authorship_field(t, true), # authorship
rank, # rank
uninomial, # uninomial
genus, # genus
subgenus, # subgenus (no parens)
species, # species
infraspecific_element ? infraspecific_element.last : nil, # infraspecificEpithet
origin_citation&.source_id, # referenceID |
origin_citation&.pages, # publishedInPage | !! All origin citations get added to reference_csv via the main loop, not here
t.year_of_publication, # publishedInYear |
true, # original
code_field(t), # code
nil, # status https://api.checklistbank.org/vocab/nomStatus
nil, # link (probably TW public or API)
remarks(t, name_remarks_vocab_id), # remarks
id, # ID
basionym_id, # basionymID
clean_sic(t.cached_original_combination), # scientificName
authorship_field(t, true), # authorship
rank, # rank
uninomial, # uninomial
genus, # genus
subgenus, # subgenus (no parens)
species, # species
infraspecific_element ? infraspecific_element.last : nil, # infraspecificEpithet
origin_citation&.source_id, # referenceID |
origin_citation&.pages, # publishedInPage | !! All origin citations get added to reference_csv via the main loop, not here
t.year_of_publication, # publishedInYear |
true, # original
code_field(t), # code
nil, # status https://api.checklistbank.org/vocab/nomStatus
nil, # link (probably TW public or API)
remarks(t, name_remarks_vocab_id), # remarks
Export::Coldp.modified(t[:updated_at]), # modified
Export::Coldp.modified_by(t[:updated_by_id], project_members) # modifiedBy
]
end

Expand All @@ -143,7 +145,7 @@ def self.clean_sic(name)

# @params otu [Otu]
# the top level OTU
def self.generate(otu, reference_csv = nil)
def self.generate(otu, project_members, reference_csv = nil)
name_total = 0
CSV.generate(col_sep: "\t") do |csv|
csv << %w{
Expand All @@ -165,6 +167,8 @@ def self.generate(otu, reference_csv = nil)
status
link
remarks
modified
modifiedBy
}

Current.project_id = otu.project_id
Expand Down Expand Up @@ -229,34 +233,36 @@ def self.generate(otu, reference_csv = nil)
# Set is: no original combination OR (valid or invalid higher, valid lower, past combinations)
if t.cached_original_combination.blank? || higher || t.is_valid? || t.is_combination?
csv << [
t.id, # ID
basionym_id, # basionymID
name_string, # scientificName # should just be t.cached
t.cached_author_year, # authorship
rank, # rank
uninomial, # uninomial <- if genus here
generic_epithet, # genus and below - IIF species or lower
infrageneric_epithet, # infragenericEpithet
specific_epithet, # specificEpithet
infraspecific_epithet, # infraspecificEpithet
origin_citation&.source_id, # publishedInID
origin_citation&.pages, # publishedInPage
t.year_of_publication, # publishedInYear
original, # original
code_field(t), # code
nom_status_field(t), # nomStatus
nil, # link (probably TW public or API)
remarks(t, name_remarks_vocab_id), # remarks
t.id, # ID
basionym_id, # basionymID
name_string, # scientificName # should just be t.cached
t.cached_author_year, # authorship
rank, # rank
uninomial, # uninomial <- if genus here
generic_epithet, # genus and below - IIF species or lower
infrageneric_epithet, # infragenericEpithet
specific_epithet, # specificEpithet
infraspecific_epithet, # infraspecificEpithet
origin_citation&.source_id, # publishedInID
origin_citation&.pages, # publishedInPage
t.year_of_publication, # publishedInYear
original, # original
code_field(t), # code
nom_status_field(t), # nomStatus
nil, # link (probably TW public or API)
remarks(t, name_remarks_vocab_id), # remarks
Export::Coldp.modified(t[:updated_at]), # modified
Export::Coldp.modified_by(t[:updated_by_id], project_members) # modifiedBy
]
end

# Here we truly want no higher
if !t.cached_original_combination.blank? && (is_genus_species && !t.is_combination? && (!t.is_valid? || t.has_alternate_original?))
name_total += 1
add_original_combination(t, csv, origin_citation, name_remarks_vocab_id)
add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members)
end

Export::Coldp::Files::Reference.add_reference_rows([origin_citation.source].compact, reference_csv) if reference_csv && origin_citation
Export::Coldp::Files::Reference.add_reference_rows([origin_citation.source].compact, reference_csv, project_members) if reference_csv && origin_citation
end
end
end
Expand Down
14 changes: 8 additions & 6 deletions lib/export/coldp/files/reference.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,31 @@ module Export::Coldp::Files::Reference
#
# !! It is not integrated yet.
#
def self.generate(project_id)
def self.generate(project_id, project_members)
CSV.generate do |csv|
Source.joins(:project_sources).where(project_sources: {project_id: project_id} ).each do |source|
csv << ref_row(source)
csv << ref_row(source, project_members)
end
end
end

def self.add_reference_rows(sources = [], reference_csv)
def self.add_reference_rows(sources = [], reference_csv, project_members)
sources.each do |s|
reference_csv[s.id] = ref_row(s)
reference_csv[s.id] = ref_row(s, project_members)
end
end

def self.ref_row(source)
def self.ref_row(source, project_members)
[
source.id,
source.cached,
# source.cached_author_string,
# source.year,
# source.journal, # source.source
# reference_details(source), # details (pages, volume, year)
source.doi
source.doi,
Export::Coldp.modified(source[:updated_at]), # modified
Export::Coldp.modified_by(source[:updated_by_id], project_members) # modifiedBy
]
end

Expand Down
20 changes: 11 additions & 9 deletions lib/export/coldp/files/synonym.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ def self.reference_id_field(otu)
end

# This is currently factored to use *no* ActiveRecord instances
def self.generate(otus, reference_csv = nil)
def self.generate(otus, project_members, reference_csv = nil)
CSV.generate(col_sep: "\t") do |csv|

csv << %w{taxonID nameID status remarks referenceID}
csv << %w{taxonID nameID status remarks referenceID modified modifiedBy}

# Only valid otus with taxon names, see lib/export/coldp.rb#otus
otus.select('otus.id id, taxon_names.cached cached, otus.taxon_name_id taxon_name_id')
Expand Down Expand Up @@ -60,7 +60,7 @@ def self.generate(otus, reference_csv = nil)
# .where(cached_valid_taxon_name_id: o[2]) # == .historical_taxon_names
# .where("( ((taxon_names.id != taxon_names.cached_valid_taxon_name_id) OR ((taxon_names.cached_original_combination != taxon_names.cached))) AND NOT (taxon_names.type = 'Combination' AND taxon_names.cached = ?))", o[1]) # see name.rb

c.pluck(:id, :cached, :cached_original_combination, :type, :rank_class, :cached_secondary_homonym)
c.pluck(:id, :cached, :cached_original_combination, :type, :rank_class, :cached_secondary_homonym, :updated_at, :updated_by_id)
.each do |t|
reified_id = ::Export::Coldp.reified_id(t[0], t[1], t[2])

Expand Down Expand Up @@ -88,17 +88,19 @@ def self.generate(otus, reference_csv = nil)
end

csv << [
o[0], # taxonID attached to the current valid concept
reified_id, # nameID
nil, # Status TODO def status(taxon_name_id)
remarks_field,
nil, # Unclear what this means in TW
o[0], # taxonID attached to the current valid concept
reified_id, # nameID
nil, # status TODO: def status(taxon_name_id)
remarks_field, # remarks
nil, # referenceID Unclear what this means in TW
Export::Coldp.modified(t[6]), # modified
Export::Coldp.modified_by(t[7], project_members) # modifiedBy
]
end
end
end
end

# It is unclear what the relationship beyond "used" means. We likely need a sensu style model to record these assertions
# Export::Coldp::Files::Reference.add_reference_rows([], reference_csv) if reference_csv
# Export::Coldp::Files::Reference.add_reference_rows([], reference_csv, project_members) if reference_csv
end
Loading
Loading