Skip to content

Commit

Permalink
Merge branch '3957_pr' into 3957_coldp_improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
mjy committed Jan 31, 2025
2 parents 33b94c1 + 0d8e035 commit f8e8b32
Show file tree
Hide file tree
Showing 10 changed files with 421 additions and 135 deletions.
17 changes: 16 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,27 @@ This project <em>does not yet</em> adheres to [Semantic Versioning](https://semv
- API endpoint for image matrix
- Added order of depictions comming from the image matrix
- Hub tasks: Add visual effect for fav icons and tooltip for categories [#4177]
- Distribution to COLDP exports [#3148]
- SpeciesInteraction to COLDP exports [#3158]
- Pull metadata from ChecklistBank in order to merge updated metadata into COLDP exports
- Add Taxon links to COLDP exports

### Fixed

- CSD: change of namespace not updating [#4147]
- TaxonWorks bibliography style for book chapter.
- Removed obsolete Description table from COLDP exports
- Remove [sic] from COLDP name fields [#3833]
- Autonym synonyms bug in COLDP exporter [#4175]

[#4177]: https://github.com/SpeciesFileGroup/taxonworks/issues/4177
[#3148]: https://github.com/SpeciesFileGroup/taxonworks/issues/3148
[#3158]: https://github.com/SpeciesFileGroup/taxonworks/issues/3158
[#3833]: https://github.com/SpeciesFileGroup/taxonworks/issues/3833
[#4135]: https://github.com/SpeciesFileGroup/taxonworks/issues/4135
[#4147]: https://github.com/SpeciesFileGroup/taxonworks/issues/4147
[#4153]: https://github.com/SpeciesFileGroup/taxonworks/issues/4153
[#4175]: https://github.com/SpeciesFileGroup/taxonworks/issues/4175
[#4177]: https://github.com/SpeciesFileGroup/taxonworks/issues/4177

## [0.46.1] - 2024-12-04

Expand Down Expand Up @@ -355,6 +366,9 @@ This project <em>does not yet</em> adheres to [Semantic Versioning](https://semv
[#3996]: https://github.com/SpeciesFileGroup/taxonworks/issues/3996
[#3997]: https://github.com/SpeciesFileGroup/taxonworks/issues/3997

### Fixed
- Handling of [sic] in Protonym#original_combination_infraspecific_element [#3867]

## [0.42.0] - 2024-06-28

### Added
Expand Down Expand Up @@ -717,6 +731,7 @@ _Special thanks to Tom Klein for his amazing open-source contributions on this r
[#3774]: https://github.com/SpeciesFileGroup/taxonworks/issues/3774
[#3794]: https://github.com/SpeciesFileGroup/taxonworks/issues/3794
[#3824]: https://github.com/SpeciesFileGroup/taxonworks/issues/3824
[#3833]: https://github.com/SpeciesFileGroup/taxonworks/issues/3833

## [0.38.2] - 2024-02-09

Expand Down
66 changes: 56 additions & 10 deletions lib/export/coldp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ module Export
# * Pending handling of both BibTeX and Verbatim
module Coldp

FILETYPES = %w{Description Name Synonym NameRelation TaxonConceptRelation TypeMaterial VernacularName Taxon References}.freeze
FILETYPES = %w{Distribution Name NameRelation SpeciesInteraction Synonym TaxonConceptRelation TypeMaterial VernacularName Taxon References}.freeze

# @return [Scope]
# A full set of valid only OTUs (= Taxa in CoLDP) that are to be sent.
Expand Down Expand Up @@ -42,7 +42,11 @@ def self.project_members(project_id)
end

def self.modified(updated_at)
updated_at.iso8601
if updated_at.nil?
''
else
updated_at&.iso8601
end
end

def self.modified_by(updated_by_id, project_members)
Expand All @@ -65,8 +69,14 @@ def self.export(otu_id, prefer_unlabelled_otus: true)
ref_tsv = {}

otu = ::Otu.find(otu_id)

# check for a clb_dataset_id identifier
ns = Namespace.find_by(institution: 'ChecklistBank', name: 'clb_dataset_id')
clb_dataset_id = otu.identifiers.where(namespace_id: ns.id)&.first&.identifier unless ns.nil?

project = ::Project.find(otu.project_id)
project_members = project_members(project.id)
feedback_url = project[:data_curation_issue_tracker_url] unless project[:data_curation_issue_tracker_url].nil?

# TODO: This will likely have to change, it is renamed on serving the file.
zip_file_path = "/tmp/_#{SecureRandom.hex(8)}_coldp.zip"
Expand All @@ -76,26 +86,62 @@ def self.export(otu_id, prefer_unlabelled_otus: true)
if Settings.sandbox_mode?
version = Settings.sandbox_commit_sha
end
metadata = {
'title' => project.name,
'version' => version,
'issued' => DateTime.now.strftime('%Y-%m-%d'),

# We lose the ability to maintain title in TW but until we can model metadata in TW,
# it seems desirable because there's a lot of TW vs CLB title mismatches
if clb_dataset_id.nil?
metadata = {
'title' => project.name,
'issued' => DateTime.now.strftime('%Y-%m-%d'),
'version' => DateTime.now.strftime('%b %Y'),
'feedbackUrl' => feedback_url
}
else
metadata = Colrapi.dataset(dataset_id: clb_dataset_id) unless clb_dataset_id.nil?

# remove fields maintained by ChecklistBank or TW
exclude_fields = %w[created createdBy modified modifiedBy attempt imported lastImportAttempt lastImportState size label citation private platform]
metadata = metadata.except(*exclude_fields)

# put feedbackUrl before the contact email in the metadata file to encourage use of the issue tracker
reordered_metadata = {}
metadata.each do |key, value|
if key == 'contact'
reordered_metadata['feedbackUrl'] = feedback_url
end
reordered_metadata[key] = value
end
metadata = reordered_metadata
end

metadata['issued'] = DateTime.now.strftime('%Y-%m-%d')
metadata['version'] = DateTime.now.strftime('%b %Y')

platform = {
'name' => 'TaxonWorks',
'alias' => 'TW',
'version' => version
}
metadata['platform'] = platform

metadata_file = Tempfile.new(metadata_path)
metadata_file.write(metadata.to_yaml)
metadata_file.close

Zip::File.open(zip_file_path, Zip::File::CREATE) do |zipfile|

(FILETYPES - %w{Name Taxon References}).each do |ft|
(FILETYPES - %w{Name Taxon References Synonym}).each do |ft| # TODO: double check Synonym belongs there.
m = "Export::Coldp::Files::#{ft}".safe_constantize
zipfile.get_output_stream("#{ft}.tsv") { |f| f.write m.generate(otus, project_members, ref_tsv) }
end

zipfile.get_output_stream('Name.tsv') { |f| f.write Export::Coldp::Files::Name.generate(otu, project_members, ref_tsv) }

skip_name_ids = Export::Coldp::Files::Name.skipped_name_ids
zipfile.get_output_stream("Synonym.tsv") { |f| f.write Export::Coldp::Files::Synonym.generate(otus, project_members, ref_tsv, skip_name_ids) }

zipfile.get_output_stream('Taxon.tsv') do |f|
f.write Export::Coldp::Files::Taxon.generate(otus, project_members, otu_id, ref_tsv)
f.write Export::Coldp::Files::Taxon.generate(otus, project_members, otu_id, ref_tsv, prefer_unlabelled_otus, skip_name_ids)
end

# Sort the refs by full citation string
Expand Down Expand Up @@ -132,7 +178,7 @@ def self.download(otu, request = nil, prefer_unlabelled_otus: true)
filename: filename(otu),
source_file_path: file_path,
request:,
expires: 2.days.from_now
expires: 5.days.from_now
)
end

Expand All @@ -142,7 +188,7 @@ def self.download_async(otu, request = nil, prefer_unlabelled_otus: true)
description: 'A zip file containing CoLDP formatted data.',
filename: filename(otu),
request:,
expires: 2.days.from_now
expires: 5.days.from_now
)

ColdpCreateDownloadJob.perform_later(otu, download, prefer_unlabelled_otus:)
Expand Down
50 changes: 0 additions & 50 deletions lib/export/coldp/files/description.rb

This file was deleted.

96 changes: 96 additions & 0 deletions lib/export/coldp/files/distribution.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# taxonID
# areaID
# area
# gazetteer
# status
# referenceID
# remarks
#
module Export::Coldp::Files::Distribution

def self.reference_id(content)
i = content.sources.pluck(:id)
return i.join(',') if i.any?
nil
end

def self.generate(otus, project_members, reference_csv = nil )
CSV.generate(col_sep: "\t") do |csv|

csv << %w{
taxonID
areaID
area
gazetteer
status
referenceID
modified
modifiedBy
remarks
}

otus.each do |o|
o.asserted_distributions.includes(:geographic_area).each do |ad|

ga = ad.geographic_area
if !ga.iso_3166_a3.blank?
gazetteer = 'iso'
area_id = ga.iso_3166_a3
area = ga.iso_3166_a3
elsif !ga.iso_3166_a2.blank?
gazetteer = 'iso'
area_id = ga.iso_3166_a2
area = ga.iso_3166_a2
elsif !ga.tdwgID.blank?
gazetteer = 'tdwg'
if ga.data_origin == 'tdwg_l3' or ga.data_origin == 'tdwg_l4'
area_id = ga.tdwgID.gsub(/^[0-9]{1,2}(.+)$/, '\1') # fixes mismatch in TW vs CoL TDWG level 3 & 4 identifiers
else
area_id = ga.tdwgID
end
area = area_id
else
gazetteer = 'text'
area_id = nil
area = ga.name
end

sources = ad.sources.load
reference_ids = sources.collect{|a| a.id}
csv << [
o.id,
area_id,
area,
gazetteer,
nil,
reference_ids.first, # reference_id: only 1 distribution reference allowed
Export::Coldp.modified(ad[:updated_at]), # modified
Export::Coldp.modified_by(ad[:updated_by_id], project_members), # modified_by
nil
]

Export::Coldp::Files::Reference.add_reference_rows(sources, reference_csv, project_members) if reference_csv
end
end

otus.joins("INNER JOIN contents ON contents.otu_id = otus.id
INNER JOIN controlled_vocabulary_terms ON controlled_vocabulary_terms.id = contents.topic_id")
.select("otus.id, contents.text, contents.updated_at, contents.updated_by_id")
.where("controlled_vocabulary_terms.name = 'Distribution text'").distinct.each do |o|
area = o.text

csv << [
o.id,
nil,
area,
'text',
nil,
nil,
Export::Coldp.modified(o.updated_at),
Export::Coldp.modified_by(o.updated_by_id, project_members),
nil
]
end
end
end
end
Loading

0 comments on commit f8e8b32

Please sign in to comment.