SpeciesFileGroup · mjy · Jul 5, 2023 · Jul 3, 2023 · Jul 3, 2023 · Jul 3, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ This project <em>does not yet</em> adheres to [Semantic Versioning](https://semv
 ## [unreleased]
 
 ### Added
+- modified and modifiedBy to the COL data package exporter [#3464]
 - Pagination to Labels and TypeMaterial .json endpoints [#3472]
 - DataAttribute columns for CollectingEvent and TaxonName filters
 - Added ranks for viruses
@@ -46,7 +47,7 @@ This project <em>does not yet</em> adheres to [Semantic Versioning](https://semv
 - Fixed URL hostname string matching in some places.
 - Matrix Column Coder throws an error after autosave ends and observation to be saved no longer exists
 
-
+[#3464]: https://github.com/SpeciesFileGroup/taxonworks/issues/3464
 [#3438]: https://github.com/SpeciesFileGroup/taxonworks/issues/3438
 [#3472]: https://github.com/SpeciesFileGroup/taxonworks/issues/3472
 [#3452]: https://github.com/SpeciesFileGroup/taxonworks/issues/3452

diff --git a/lib/export/coldp.rb b/lib/export/coldp.rb
@@ -27,6 +27,26 @@ def self.otus(otu_id)
         .where('(otus.name IS NULL) OR (otus.name = taxon_names.cached)')
     end
 
+    def self.project_members(project_id)
+      project_members = {}
+      ProjectMember.where(project_id: project_id).each do |pm|
+        if pm.user.orcid.nil?
+          project_members[pm.user_id] = pm.user.name
+        else
+          project_members[pm.user_id] = pm.user.orcid
+        end
+      end
+      project_members
+    end
+
+    def self.modified(updated_at)
+      updated_at.iso8601
+    end
+
+    def self.modified_by(updated_by_id, project_members)
+      project_members[updated_by_id]
+    end
+
     def self.export(otu_id, prefer_unlabelled_otus: true)
       otus = otus(otu_id)
 
@@ -35,6 +55,7 @@ def self.export(otu_id, prefer_unlabelled_otus: true)
 
       otu = ::Otu.find(otu_id)
       project = ::Project.find(otu.project_id)
+      project_members = project_members(otu.project_id)
 
       # TODO: This will likely have to change, it is renamed on serving the file.
       zip_file_path = "/tmp/_#{SecureRandom.hex(8)}_coldp.zip"
@@ -56,19 +77,19 @@ def self.export(otu_id, prefer_unlabelled_otus: true)
       Zip::File.open(zip_file_path, Zip::File::CREATE) do |zipfile|
         (FILETYPES - ['Name']).each do |ft|
           m = "Export::Coldp::Files::#{ft}".safe_constantize
-          zipfile.get_output_stream("#{ft}.csv") { |f| f.write m.generate(otus, ref_csv) }
+          zipfile.get_output_stream("#{ft}.csv") { |f| f.write m.generate(otus, project_members, ref_csv) }
         end
 
-        zipfile.get_output_stream('Name.csv') { |f| f.write Export::Coldp::Files::Name.generate(otu, ref_csv) }
+        zipfile.get_output_stream('Name.csv') { |f| f.write Export::Coldp::Files::Name.generate(otu, project_members, ref_csv) }
         zipfile.get_output_stream('Taxon.csv') do |f|
-          f.write Export::Coldp::Files::Taxon.generate(otus, otu_id, ref_csv, prefer_unlabelled_otus: prefer_unlabelled_otus)
+          f.write Export::Coldp::Files::Taxon.generate(otus, project_members, otu_id, ref_csv)
         end
 
         # Sort the refs by full citation string
         sorted_refs = ref_csv.values.sort{|a,b| a[1] <=> b[1]}
 
         d = CSV.generate(col_sep: "\t") do |csv|
-          csv << %w{ID citation	doi} # author year source details
+          csv << %w{ID citation	doi modified modifiedBy} # author year source details
           sorted_refs.each do |r|
             csv << r
           end

diff --git a/lib/export/coldp/files/description.rb b/lib/export/coldp/files/description.rb
@@ -15,7 +15,7 @@ def self.reference_id(content)
     nil
   end
 
-  def self.generate(otus, reference_csv = nil )
+  def self.generate(otus, project_members, reference_csv = nil )
     CSV.generate(col_sep: "\t") do |csv|
 
       csv << %w{ 
@@ -24,6 +24,8 @@ def self.generate(otus, reference_csv = nil )
         description
         language
         referenceID
+        modified
+        modifiedBy
       }
 
       otus.joins(:contents).each do |o|
@@ -35,10 +37,12 @@ def self.generate(otus, reference_csv = nil )
             c.topic_id, # TODO: refence EOL or related unitified topic DOIs
             c.text,
             c.language&.alpha_3_bibliographic,
-            sources.collect{|a| a.id}.join(',')
+            sources.collect{|a| a.id}.join(','),
+            Export::Coldp.modified(c[:updated_at]),                            # modified
+            Export::Coldp.modified_by(c[:updated_by_id], project_members)      # modifiedBy
           ]
 
-          Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv) if reference_csv
+          Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv, project_members) if reference_csv
         end
       end
     end

diff --git a/lib/export/coldp/files/name.rb b/lib/export/coldp/files/name.rb
@@ -58,7 +58,7 @@ def self.nom_status_field(taxon_name)
   # Invalid Protonyms are rendered only as their original Combination
   # @param t [Protonym]
   #    only place that var./frm can be handled.
-  def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id)
+  def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members)
     e = t.original_combination_elements
 
     infraspecific_element = t.original_combination_infraspecific_element(e)
@@ -116,24 +116,26 @@ def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id
     end
 
     csv << [
-      id,                                                        # ID
-      basionym_id,                                               # basionymID
-      clean_sic(t.cached_original_combination),                  # scientificName
-      authorship_field(t, true),                                 # authorship
-      rank,                                                      # rank
-      uninomial,                                                 # uninomial
-      genus,                                                     # genus
-      subgenus,                                                  # subgenus (no parens)
-      species,                                                   # species
-      infraspecific_element ? infraspecific_element.last : nil,  # infraspecificEpithet
-      origin_citation&.source_id,                                # referenceID    |
-      origin_citation&.pages,                                    # publishedInPage  | !! All origin citations get added to reference_csv via the main loop, not here
-      t.year_of_publication,                                     # publishedInYear  |
-      true,                                                      # original
-      code_field(t),                                             # code
-      nil,                                                       # status https://api.checklistbank.org/vocab/nomStatus
-      nil,                                                       # link (probably TW public or API)
-      remarks(t, name_remarks_vocab_id),                         # remarks
+      id,                                                            # ID
+      basionym_id,                                                   # basionymID
+      clean_sic(t.cached_original_combination),                      # scientificName
+      authorship_field(t, true),                                     # authorship
+      rank,                                                          # rank
+      uninomial,                                                     # uninomial
+      genus,                                                         # genus
+      subgenus,                                                      # subgenus (no parens)
+      species,                                                       # species
+      infraspecific_element ? infraspecific_element.last : nil,      # infraspecificEpithet
+      origin_citation&.source_id,                                    # referenceID    |
+      origin_citation&.pages,                                        # publishedInPage  | !! All origin citations get added to reference_csv via the main loop, not here
+      t.year_of_publication,                                         # publishedInYear  |
+      true,                                                          # original
+      code_field(t),                                                 # code
+      nil,                                                           # status https://api.checklistbank.org/vocab/nomStatus
+      nil,                                                           # link (probably TW public or API)
+      remarks(t, name_remarks_vocab_id),                             # remarks
+      Export::Coldp.modified(t[:updated_at]),                        # modified
+      Export::Coldp.modified_by(t[:updated_by_id], project_members)  # modifiedBy
     ]
   end
 
@@ -143,7 +145,7 @@ def self.clean_sic(name)
 
   # @params otu [Otu]
   #   the top level OTU
-  def self.generate(otu, reference_csv = nil)
+  def self.generate(otu, project_members, reference_csv = nil)
      name_total = 0
     CSV.generate(col_sep: "\t") do |csv|
       csv << %w{
@@ -165,6 +167,8 @@ def self.generate(otu, reference_csv = nil)
         status
         link
         remarks
+        modified
+        modifiedBy
       }
 
       Current.project_id = otu.project_id
@@ -229,34 +233,36 @@ def self.generate(otu, reference_csv = nil)
           # Set is: no original combination OR (valid or invalid higher, valid lower, past combinations)
           if t.cached_original_combination.blank? || higher || t.is_valid? || t.is_combination?
             csv << [
-              t.id,                                     # ID
-              basionym_id,                              # basionymID
-              name_string,                              # scientificName  # should just be t.cached
-              t.cached_author_year,                     # authorship
-              rank,                                     # rank
-              uninomial,                                # uninomial   <- if genus here
-              generic_epithet,                          # genus and below - IIF species or lower
-              infrageneric_epithet,                     # infragenericEpithet
-              specific_epithet,                         # specificEpithet
-              infraspecific_epithet,                    # infraspecificEpithet
-              origin_citation&.source_id,               # publishedInID
-              origin_citation&.pages,                   # publishedInPage
-              t.year_of_publication,                    # publishedInYear
-              original,                                 # original
-              code_field(t),                            # code
-              nom_status_field(t),                      # nomStatus
-              nil,                                      # link (probably TW public or API)
-              remarks(t, name_remarks_vocab_id),        # remarks
+              t.id,                                                          # ID
+              basionym_id,                                                   # basionymID
+              name_string,                                                   # scientificName  # should just be t.cached
+              t.cached_author_year,                                          # authorship
+              rank,                                                          # rank
+              uninomial,                                                     # uninomial   <- if genus here
+              generic_epithet,                                               # genus and below - IIF species or lower
+              infrageneric_epithet,                                          # infragenericEpithet
+              specific_epithet,                                              # specificEpithet
+              infraspecific_epithet,                                         # infraspecificEpithet
+              origin_citation&.source_id,                                    # publishedInID
+              origin_citation&.pages,                                        # publishedInPage
+              t.year_of_publication,                                         # publishedInYear
+              original,                                                      # original
+              code_field(t),                                                 # code
+              nom_status_field(t),                                           # nomStatus
+              nil,                                                           # link (probably TW public or API)
+              remarks(t, name_remarks_vocab_id),                             # remarks
+              Export::Coldp.modified(t[:updated_at]),                        # modified
+              Export::Coldp.modified_by(t[:updated_by_id], project_members)  # modifiedBy
             ]
           end
 
           # Here we truly want no higher
           if !t.cached_original_combination.blank? && (is_genus_species && !t.is_combination? && (!t.is_valid? || t.has_alternate_original?))
             name_total += 1
-            add_original_combination(t, csv, origin_citation, name_remarks_vocab_id)
+            add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members)
           end
 
-          Export::Coldp::Files::Reference.add_reference_rows([origin_citation.source].compact, reference_csv) if reference_csv && origin_citation
+          Export::Coldp::Files::Reference.add_reference_rows([origin_citation.source].compact, reference_csv, project_members) if reference_csv && origin_citation
         end
       end
     end

diff --git a/lib/export/coldp/files/reference.rb b/lib/export/coldp/files/reference.rb
@@ -7,29 +7,31 @@ module Export::Coldp::Files::Reference
   #
   # !! It is not integrated yet.
   # 
-  def self.generate(project_id)
+  def self.generate(project_id, project_members)
     CSV.generate do |csv|
       Source.joins(:project_sources).where(project_sources: {project_id: project_id} ).each do |source|
-        csv << ref_row(source)
+        csv << ref_row(source, project_members)
       end
     end
   end
 
-  def self.add_reference_rows(sources = [], reference_csv)
+  def self.add_reference_rows(sources = [], reference_csv, project_members)
     sources.each do |s|
-      reference_csv[s.id] = ref_row(s)   
+      reference_csv[s.id] = ref_row(s, project_members)
     end 
   end
 
-  def self.ref_row(source)
+  def self.ref_row(source, project_members)
     [
       source.id,
       source.cached,
 #     source.cached_author_string,
 #     source.year,
 #     source.journal,                # source.source
 #     reference_details(source),     # details (pages, volume, year)
-      source.doi 
+      source.doi,
+      Export::Coldp.modified(source[:updated_at]),                        # modified
+      Export::Coldp.modified_by(source[:updated_by_id], project_members)  # modifiedBy 
     ]
   end
 

diff --git a/lib/export/coldp/files/synonym.rb b/lib/export/coldp/files/synonym.rb
@@ -26,10 +26,10 @@ def self.reference_id_field(otu)
   end
 
   # This is currently factored to use *no* ActiveRecord instances
-  def self.generate(otus, reference_csv = nil)
+  def self.generate(otus, project_members, reference_csv = nil)
     CSV.generate(col_sep: "\t") do |csv|
 
-      csv << %w{taxonID nameID status remarks referenceID}
+      csv << %w{taxonID nameID status remarks referenceID modified modifiedBy}
 
       # Only valid otus with taxon names, see lib/export/coldp.rb#otus
       otus.select('otus.id id, taxon_names.cached cached, otus.taxon_name_id taxon_name_id')
@@ -60,7 +60,7 @@ def self.generate(otus, reference_csv = nil)
           #   .where(cached_valid_taxon_name_id: o[2]) # == .historical_taxon_names
           #   .where("( ((taxon_names.id != taxon_names.cached_valid_taxon_name_id) OR ((taxon_names.cached_original_combination != taxon_names.cached))) AND NOT (taxon_names.type = 'Combination' AND taxon_names.cached = ?))", o[1]) # see name.rb
 
-          c.pluck(:id, :cached, :cached_original_combination, :type, :rank_class, :cached_secondary_homonym)
+          c.pluck(:id, :cached, :cached_original_combination, :type, :rank_class, :cached_secondary_homonym, :updated_at, :updated_by_id)
             .each do |t|
               reified_id = ::Export::Coldp.reified_id(t[0], t[1], t[2])
 
@@ -88,17 +88,19 @@ def self.generate(otus, reference_csv = nil)
               end
 
               csv << [
-                o[0],           # taxonID attached to the current valid concept
-                reified_id,     # nameID
-                nil,            # Status TODO def status(taxon_name_id)
-                remarks_field,
-                nil,            # Unclear what this means in TW
+                o[0],                                             # taxonID attached to the current valid concept
+                reified_id,                                       # nameID
+                nil,                                              # status  TODO: def status(taxon_name_id)
+                remarks_field,                                    # remarks
+                nil,                                              # referenceID   Unclear what this means in TW
+                Export::Coldp.modified(t[6]),                     # modified
+                Export::Coldp.modified_by(t[7], project_members)  # modifiedBy
               ]
             end
         end
     end
   end
 
   # It is unclear what the relationship beyond "used" means. We likely need a sensu style model to record these assertions
-  # Export::Coldp::Files::Reference.add_reference_rows([], reference_csv) if reference_csv
+  # Export::Coldp::Files::Reference.add_reference_rows([], reference_csv, project_members) if reference_csv
 end