Skip to content

Commit

Permalink
added initial migration rake tasks for the new DMPTool
Browse files Browse the repository at this point in the history
  • Loading branch information
briri committed Sep 9, 2024
1 parent 68287e6 commit 344de43
Showing 1 changed file with 211 additions and 15 deletions.
226 changes: 211 additions & 15 deletions lib/tasks/v6_migration.rake
Original file line number Diff line number Diff line change
Expand Up @@ -11,50 +11,246 @@ namespace :v6_migration do
desc 'Generate all of the data migration scripts'
task generate_all: :environment do
# Templates, Sections, Questions
Rake::Tasks['v6_migration:generate_mock_users'].execute
Rake::Task['v6_migration:generate_templates'].execute
Rake::Task['v6_migration:generate_sections'].execute

end

desc 'Generate mock/test users for each Org that has a published template'
task generate_mock_users: :environment do
p ""
p "MOCK ADMIN USERS"
p "================================================================="
task generate_mock_admins: :environment do
p "Generating SQL statements for Mock Template Admins ..."
orgs = RegistryOrg.known
templates = Template.published.where(org_id: orgs.map(&:org_id), customization_of: nil)

file_name = Rails.root.join('tmp', "v6_userss_#{Time.now.strftime('%Y-%m-%d_%H%M')}.sql")
file = File.open(file_name, 'w+')

file.write "# "
file.write "# MOCK ADMIN USERS"
file.write "# ================================================================="

already_done = []
templates.each do |tmplt|
org = orgs.select { |ror| ror.org_id == tmplt.org_id }&.first
next if org.nil?

domain = org.name.split(' (')&.last&.gsub(')', '').strip
p "INSERT INTO `users` (`email`, `affiliationId`, `password`, `givenName`, `surName`, `role`) VALUES ('admin@#{domain}', '#{org.ror_id}', '$2a$10$f3wCBdUVt/2aMcPOb.GX1OBO9WMGxDXx5HKeSBBnrMhat4.pis4Pe', '#{org.name.split(" (").first.strip} Admin', 'Test User', 'ADMIN');"
unless already_done.include?(domain)
file.write "INSERT INTO `users` (`email`, `affiliationId`, `password`, `givenName`, `surName`, `role`) VALUES ('admin@#{domain}', '#{org.ror_id}', '$2a$10$f3wCBdUVt/2aMcPOb.GX1OBO9WMGxDXx5HKeSBBnrMhat4.pis4Pe', '#{org.name.split(" (").first.strip} Admin', 'Test User', 'ADMIN'); "
already_done << domain
end
end
file.close
p "DONE. SQL written to: #{file_name}"
end

desc 'Generate all of the template migrations'
task generate_templates: :environment do
p ""
p "TEMPLATES"
p "================================================================="
p "Generating SQL statements for published templates ..."
orgs = RegistryOrg.known
templates = Template.published.where(org_id: orgs.map(&:org_id), customization_of: nil)

file_name = Rails.root.join('tmp', "v6_templates_#{Time.now.strftime('%Y-%m-%d_%H%M')}.sql")
file = File.open(file_name, 'w+')

file.write "# "
file.write "# TEMPLATES"
file.write "# ================================================================="

templates.each do |tmplt|
tmplt_title = safe_text(tmplt.title)
tmplt_desc = safe_text(tmplt.description)

# Skip (for now) if the org is not in ROR
org = orgs.select { |ror| ror.org_id == tmplt.org_id }&.first
next if org.nil?

p '---'
p "INSERT INTO `templates` (`name`, `ownerId`, `visibility`, `currentVersion`, `isDirty`, `bestPractice`, `createdById`, `created`, `modifiedById`, `modified`) (SELECT '#{tmplt.title.strip}', '#{org.ror_id}', '#{tmplt.visibility == 1 ? "PUBLIC" : "PRIVATE"}', 'v#{tmplt.version + 1}', 0, #{tmplt.is_default ? 1 : 0}, `users`.`id`, '#{tmplt.created_at}', `users`.`id`, '#{tmplt.updated_at}' FROM `users` WHERE `affiliationId` = '#{org.ror_id}' AND `role` = 'ADMIN');"
p "INSERT INTO `versionedTemplates` (`templateId`, `active`, `version`, `versionType`, `versionedById`, `comment`, `name`, `ownerId`, `visibility`, `bestPractice`, `created`, `modified`, `createdById`, `modifiedById`) (SELECT `templates`.`id`, 1, `templates`.`currentVersion`, 'PUBLISHED', `templates`.`createdById`, '', `templates`.`name`, `templates`.`ownerId`, `templates`.`comment`, `templates`.`visibility`, `templates`.`bestPractice`, `templates`.`created`, `templates`.`modified`, `templates`.`createdbyId`, `templates`.`modifiedById` FROM `templates` WHERE `templates`.`name` = '#{tmplt.title.strip}' AND `templates`.`ownerId` = '#{org.ror_id}' LIMIT 1);"
p '---'
file.write ''
file.write "INSERT INTO `templates` (`name`, `description`, `ownerId`, `visibility`, `currentVersion`, `isDirty`, `bestPractice`, `createdById`, `created`, `modifiedById`, `modified`) (SELECT '#{tmplt_title}', '#{tmplt_desc}', '#{org.ror_id}', '#{tmplt.visibility == 1 ? "PUBLIC" : "PRIVATE"}', 'v#{tmplt.version + 1}', 0, #{tmplt.is_default ? 1 : 0}, `users`.`id`, '#{tmplt.created_at.utc.strftime('%Y-%m-%d %H:%M:%S')}', `users`.`id`, '#{tmplt.updated_at.utc.strftime('%Y-%m-%d %H:%M:%S')}' FROM `users` WHERE `affiliationId` = '#{org.ror_id}' AND `role` = 'ADMIN'); "
file.write "INSERT INTO `versionedTemplates` (`templateId`, `active`, `version`, `versionType`, `versionedById`, `comment`, `name`, `description`, `ownerId`, `visibility`, `bestPractice`, `created`, `modified`, `createdById`, `modifiedById`) (SELECT `templates`.`id`, 1, `templates`.`currentVersion`, 'PUBLISHED', (SELECT id FROM users WHERE email = '[email protected]' LIMIT 1), 'Initial migration from the old DMPTool system.', `templates`.`name`, `templates`.`description`, `templates`.`ownerId`, `templates`.`visibility`, `templates`.`bestPractice`, `templates`.`created`, `templates`.`modified`, `templates`.`createdbyId`, `templates`.`modifiedById` FROM `templates` WHERE `templates`.`name` = '#{tmplt_title}' AND `templates`.`ownerId` = '#{org.ror_id}' LIMIT 1); "
file.write ''

# Fetch the phase ids
phase_ids = Phase.where(template_id: tmplt.id).pluck(:id)

# Fetch any org guidance that we can hook onto the sections
guidance = Guidance.includes(:guidance_group, :themes)
.joins(:guidance_group)
.where(guidance_group: [org_id: org.id])

guidance_hash = guidance.map do |rec|
tags = themes_to_tags(rec.themes)
{ tags: tags, text: safe_text(rec.text) }
end

# Fetch the sections
file.write ''
sections = Section.where(phase_id: phase_ids)
sections.each do |section|
tags = tags_from_title(section.title)
guidance = guidance_hash.select { |guide| guide[:tags] & tags }.join
guidance = '' if guidance.nil?
section_title = safe_text(section.title)
section_desc = safe_text(section.description)
puts "No tags found for Section title: #{section_title}" if tags.empty?

file.write "INSERT INTO `sections` (`templateId`, `name`, `introduction`, `requirements`, `guidance`, `displayOrder`, `isDirty`, `createdById`, `created`, `modifiedById`, `modified`) (SELECT `templates`.`id`, '#{section_title}', '#{section_desc}', '', '#{guidance}', #{section.number}, 0, `templates`.`createdById`, `templates`.`created`, `templates`.`modifiedById`, `templates`.`modified` FROM `templates` WHERE `ownerId` = '#{org.ror_id}' AND `name` = '#{tmplt_title}'); "
file.write "INSERT INTO `versionedSections` (`sectionId`, `versionedTemplateId`, `name`, `introduction`, `requirements`, `guidance`, `displayOrder`, `createdById`, `created`, `modifiedById`, `modified`) (SELECT `sections`.`id`, (SELECT `id` FROM `versionedTemplates` WHERE `ownerId` = '#{org.ror_id}' AND `name` = '#{tmplt_title}' AND `active` = 1), '#{section_title}', '#{section_desc}', '', '#{guidance}', #{section.number}, `sections`.`createdById`, `sections`.`created`, `sections`.`modifiedById`, `sections`.`modified` FROM `templates` INNER JOIN `sections` ON `templates`.`id` = `sections`.`templateId` WHERE `sections`.`name` = '#{section_title}' AND `templates`.`ownerId` = '#{org.ror_id}' AND `templates`.`name` = '#{tmplt_title}' LIMIT 1); "
tags.each do |tag|
file.write "INSERT INTO `sectionTags` (`sectionId`, `tagId`, `createdById`, `modifiedById`) (SELECT `sections`.`id`, (SELECT `tags`.`id` FROM `tags` WHERE `tags`.`name` = '#{tag}'), `sections`.`createdById`, `sections`.`modifiedById` FROM `templates` INNER JOIN `sections` ON `templates`.`id` = `sections`.`templateId` WHERE `sections`.`name` = '#{section_title}' AND `templates`.`ownerId` = '#{org.ror_id}' AND `templates`.`name` = '#{tmplt_title}' LIMIT 1); "
end
file.write ''
end

file.write ''
end

file.close
p "DONE. SQL written to: #{file_name}"
end

def safe_text(text)
text&.strip
&.gsub("'"){"\\'"}
&.gsub(/\r/," ")
&.gsub(/\n/," ")
end

desc 'Generate all of the section migrations'
task generate_sections: :environment do
def tags_from_title(title)
prepped = title.downcase.strip
tags = []
tags << 'Data description' if prepped.include?('description') ||
prepped.include?('produced') ||
prepped.include?('products') ||
prepped.include?('sources') ||
prepped.include?('data format') ||
prepped.include?('data type') ||
prepped.include?('sample type') ||
prepped.include?('data volume') ||
prepped.include?('data input') ||
prepped.include?('data summary') ||
prepped.include?('produzidos') ||
prepped.include?('materials') ||
prepped.include?('research outputs') ||
prepped.include?('attributes') ||
prepped.include?('computational environment')

tags << 'Data organization & documentation' if prepped.include?('documentation') ||
prepped.include?('metadata') ||
prepped.include?('metadados') ||
prepped.include?('organization') ||
prepped.include?('version control') ||
prepped.include?('standard') ||
prepped.include?('documentação') ||
prepped.include?('methodology')

tags << 'Security & privacy' if prepped.include?('security') ||
prepped.include?('privacy') ||
prepped.include?('protection') ||
prepped.include?(' legal') ||
prepped.include?('copyright') ||
prepped.include?('intellectual property') ||
prepped.include?('proprietary') ||
prepped.include?('propriedade intelectual') ||
prepped.include?('restriction') ||
prepped.include?('secondary use') ||
prepped.include?('secure ')

tags << 'Ethical considerations' if prepped.include?('ethic') ||
prepped.include?('policy') ||
prepped.include?('policies') ||
prepped.include?('legal') ||
prepped.include?('indigenous') ||
prepped.include?('obligation') ||
prepped.include?('disposal') ||
prepped.include?('bias') ||
prepped.include?('legais')

tags << 'Training & support' if prepped.include?('training') ||
prepped.include?('support') ||
prepped.include?('documenting') ||
prepped.include?('contact ') ||

tags << 'Data sharing' if prepped.include?('share ') ||
prepped.include?('sharing') ||
prepped.include?('reuse ') ||
prepped.include?('re-use') ||
prepped.include?('audience') ||
prepped.include?('access') ||
prepped.include?('publication') ||
prepped.include?('storing') ||
prepped.include?('reutilização') ||
prepped.include?('dissemination') ||
prepped.include?('compartilhamento')

tags << 'Data storage & backup' if prepped.include?('storage') ||
prepped.include?('backup') ||
prepped.include?('back-up') ||
prepped.include?('repositor') ||
prepped.include?('preserv') ||
prepped.include?('management') ||
prepped.include?('retention') ||
prepped.include?('armazenados') ||
prepped.include?('archiving')

tags << 'Data quality & integrity' if prepped.include?('quality') ||
prepped.include?('integrity') ||
prepped.include?('adherence') ||
prepped.include?('managing') ||
prepped.include?('monitoring') ||
prepped.include?('validation')

tags << 'Roles & responsibilities' if prepped.include?(' role') ||
prepped.include?('responsibilit') ||
prepped.include?('collaborator') ||
prepped.include?('administration') ||
prepped.include?('author') ||
prepped.include?('papéis') ||
prepped.include?('responsabilidade')

tags << 'Budget' if prepped.include?('budget') ||
prepped.include?('funding') ||
prepped.include?(' cost')

tags << 'Data collection' if prepped.include?('collection') ||
prepped.include?('collecting') ||
prepped.include?('protocol') ||
prepped.include?('software') ||
prepped.include?('analysis') ||
prepped.include?('third party') ||
prepped.include?('generation') ||
prepped.include?('assessment') ||
prepped.include?('gerenciamento') ||
prepped.include?('recolhidos') ||
prepped.include?('pre-cruise') ||
prepped.include?('data synthesis') ||
prepped.include?('geração') ||
prepped.include?('coleta')
tags
end

def themes_to_tags(themes)
tags = []
themes.each do |theme|
theme = theme.downcase.strip
tags << 'Budget' if theme == 'budget'
tags << 'Data collection' if theme == 'data collection'
tags << 'Data description' if theme == 'data description'
tags << 'Data description' if theme == 'data format'
tags << 'Data description' if theme == 'data volume'
tags << 'Data storage & backup' if theme == 'data repository'
tags << 'Data sharing' if theme == 'data sharing'
tags << 'Security & privacy' if theme == 'ethics & privacy'
tags << 'Ethical considerations' if theme == 'ethics & privacy'
tags << 'Security & privacy' if theme == 'intellectual property rights'
tags << 'Data organization & documentation' if theme == 'metadata & documentation'
tags << 'Data storage & backup' if theme == 'preservaction'
tags << 'Data storage & backup' if theme == 'storage & security'
tags << 'Security & privacy' if theme == 'storage & security'
tags << 'Roles & responsibilities' if theme == 'roles & responsibilities'
tags << 'Data sharing' if theme == 'related policies'
tags << 'Security & privacy' if theme == 'related policies'
end
tags.uniq
end
end

0 comments on commit 344de43

Please sign in to comment.