Skip to content

Commit

Permalink
Tune and unclutter unify().
Browse files Browse the repository at this point in the history
  • Loading branch information
mjy committed Oct 10, 2024
1 parent b6b20d4 commit 68d5735
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,27 @@
<thead>
<tr>
<th class="w-2"></th>

<th class="w-2">Attempted</th>
<th class="w-2">Unified</th>
<th class="w-2">Not unified</th>
<th class="w-2">Deduplicated</th>
<th class="w-2">Not unified</th>
<th>Errors</th>
</tr>
</thead>
<tbody>
<template
v-for="(
{ merged, unmerged, deduplicated, errors }, key
{ merged, deduplicated, unmerged, errors, attempted }, key
) in response.details"
:key="key"
>
<tr>
<td>{{ key }}</td>
<td>{{ attempted }}</td>
<td>{{ merged }}</td>
<td>{{ unmerged }}</td>
<td>{{ deduplicated }}</td>
<td>{{ unmerged }}</td>
<td>
<div v-if="errors">
<ul class="no_bullets">
Expand Down
20 changes: 10 additions & 10 deletions app/models/common_name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@ class CommonName < ApplicationRecord
validates :start_year,
numericality: {only_integer: true,
greater_than: -2500,
less_than: (Time.now.year + 5),
message: 'start date year must be an integer greater than 1500, and no more than 5 ' \
'years in the future'},
length: {is: 4},
allow_nil: true
less_than: (Time.now.year + 5),
message: 'start date year must be an integer greater than 1500, and no more than 5 ' \
'years in the future'},
length: {is: 4},
allow_nil: true

validates :end_year,
numericality: {only_integer: true,
greater_than: -2500,
less_than: (Time.now.year + 5),
message: 'start date year must be an integer greater than 1500, and no more than 5 ' \
'years in the future'},
length: {is: 4},
allow_nil: true
less_than: (Time.now.year + 5),
message: 'start date year must be an integer greater than 1500, and no more than 5 ' \
'years in the future'},
length: {is: 4},
allow_nil: true

end
71 changes: 45 additions & 26 deletions app/models/concerns/shared/unify.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ module Shared::Unify
# Never auto-handle these, let the final destroy remove them.
# Housekeeping relations are not hit here, we don't merge users at the moment.
EXCLUDE_RELATIONS = [
:versions, # Not picked up, but adding in case
:dwc_occurrence, # Will be destroyed on related objects destruction
:pinboard_items, # Technically not needed here
:versions, # Not picked up, but adding in case
:dwc_occurrence, # Will be destroyed on related objects destruction
:pinboard_items, # Technically not needed here
:cached_map_register, # Destroyed on merge of things like Georeferences and AssertedDistributions
:cached_map_items,
]

# Per class, Iterating through all of these
Expand Down Expand Up @@ -133,22 +135,16 @@ def unify(remove_object, only: [], except: [], preview: false, cutoff: 250, targ
before_unify # does nothing yet maybe outside here

merge_relations(only:, except:).each do |r|
i = o.send(r.name)
next if i.nil? # has_one case

n = relation_label(r)

i = i.where(project_id: pid)

# Discern b/w has_one and has_many
if r.class.name.match('HasMany')
case relationship_type(r)

when :has_many
i = o.send(r.name).where(project_id: pid)
next unless i.any?

s[:details].merge!(
n => {
merged: 0,
unmerged: 0 }
)
stub_unify_result(s, n, i.size)

q = o.send(r.name).where(project_id: pid)

Expand All @@ -160,21 +156,21 @@ def unify(remove_object, only: [], except: [], preview: false, cutoff: 250, targ
log_unify_result(j, r, s)
end

else
s[:details].merge!(
n => {
merged: 0,
unmerged: 0 }
)
when :has_one
i = o.send(r.name)
if !i.nil?
stub_unify_result(s, n, 1)

i.update(r.options[:inverse_of] => self)
log_unify_result(i, r, s)
i.update(r.options[:inverse_of] => self)
log_unify_result(i, r, s)
end
end
end

if cutoff_hit = s[:result][:total_related] > cutoff
s[:result][:message] = "Related cutoff threshold (> #{cutoff}) hit, unify is not yet allowed on these objects."
else

begin
o.reload # reset all in-memory has_many caches that would prevent destroy

Expand Down Expand Up @@ -228,11 +224,12 @@ def unify_relations_metadata(target_project_id: nil)
merge_relations.each do |r|
name = relation_label(r)

if r.class.name.match('HasMany')
case relationship_type(r)
when :has_many
i = send(r.name).where(project_id: target_project_id)
next unless i.count > 0
s[r.name] = { total: i.count, name: }
else # TODO: HasOne check?!
when :has_one
if send(r.name).present?
s[r.name] = { total: 1, name: }
end
Expand Down Expand Up @@ -326,8 +323,8 @@ def log_unify_result(object, relation, result)
result[:details][n][:unmerged] += 1
result[:details][n][:errors] ||= []
result[:details][n][:errors].push( {id: object.id, message: object.errors.full_messages.join('; ')} )
else
result[:details][n][:merged] += 1
else # We unified and destroyed the duplicate
result[:details][n][:deduplicated] += 1
end

# THere are no errors we can fix, ensure we have a fresh copy
Expand All @@ -347,4 +344,26 @@ def log_unify_result(object, relation, result)
result
end

def relationship_type(relation)
if relation.class.name.match('HasMany')
return :has_many
elsif relation.class.name.match('HasOne')
return :has_one
end

# Should never hit here
raise
end

def stub_unify_result(result, relation_name, attempted)
result[:details].merge!(
relation_name => {
attempted:,
merged: 0,
unmerged: 0,
deduplicated: 0
}
)
end

end
22 changes: 22 additions & 0 deletions spec/models/concerns/shared/unify_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,28 @@
expect(o2.destroyed?).to be_truthy
end

specify 'unify one degree of seperation - records deduplication result in preview' do
ad1 = FactoryBot.create(:valid_asserted_distribution, otu: o1)
ad2 = FactoryBot.create(:valid_asserted_distribution, otu: o2, geographic_area: ad1.geographic_area) # differ only by OTU

n = FactoryBot.create(:valid_note, note_object: ad1)

b = o1.unify(o2, preview: true)

expect( b[:details]['Asserted distributions'].dig(:deduplicated)).to eq(1)
end

specify 'unify one degree of seperation - records deduplication result' do
ad1 = FactoryBot.create(:valid_asserted_distribution, otu: o1)
ad2 = FactoryBot.create(:valid_asserted_distribution, otu: o2, geographic_area: ad1.geographic_area) # differ only by OTU

n = FactoryBot.create(:valid_note, note_object: ad1)

b = o1.unify(o2)

expect( b[:details]['Asserted distributions'].dig(:deduplicated)).to eq(1)
end

# Generalize to all annotations.
#
# If unify would create two identical citations anywhere
Expand Down

0 comments on commit 68d5735

Please sign in to comment.