Skip to content
This repository has been archived by the owner on Jul 24, 2021. It is now read-only.

Commit

Permalink
delete old device reports
Browse files Browse the repository at this point in the history
We will delete all device reports (and associated validation_state records,
and then orphaned validation_results) that are older than 6 months, except we
retain the most recent report of each state (error, fail, pass).

Due to the amount of time to run (6+ hours), it is implemented mainly as a
command that can be run in parallel with normal operations, rather than as a
database migration that is run in between system restarts.
  • Loading branch information
karenetheridge committed Nov 12, 2020
1 parent 163dd8c commit 569c623
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 79 deletions.
116 changes: 37 additions & 79 deletions lib/Conch/Command/thin_device_reports.pm
Original file line number Diff line number Diff line change
Expand Up @@ -104,101 +104,59 @@ sub _process_device ($self, $device) {
my $report_count = 0;
print 'device id ', $device->id, ': ';

# Consider the validation status of all reports, oldest first, in pages of 100 rows each.
# Valid reports with no validation results are considered to be a 'pass', i.e. eligible for
# deletion.
my $query = <<'SQL';
select id from (
select
device_report.id,
device_report.created,
coalesce(
case when count(distinct(validation_state.status)) > 1 then 'NOT_UNIQUE'
else min(validation_state.status)::text end,
'NONE') as status,
row_number() over (
partition by status order by device_report.created desc
) as seq
from device_report
left join validation_state on validation_state.device_report_id = device_report.id
where device_report.device_id = ?
group by device_report.id, device_report.created, status
) _tmp
where status != 'NOT_UNIQUE' and seq > 1 and created < (now() - interval '6 months')
SQL

# delete all device_report rows (cascading to validation_state, validation_state_member) which
# are older than six months, except for the most recent report of each validation status (error,
# fail, pass)
my $device_report_rs = $self->app->db_device_reports
->search({ 'device_report.device_id' => $device->id })
->columns('device_report.id')
->with_report_status
->order_by({ -asc => 'device_report.created' })
->rows(100)
->page(1)
->hri;

# we only delete reports when we are done, so we can safely iterate through reports
# without the pages changing strangely
my @delete_report_ids;

# we push data about reports to the end as we consider each one,
# and shift data off at the beginning when we're done
# $report_statuses[-1] current report
# $report_statuses[-2] previous report
# $report_statuses[-3] 2 reports ago
my @report_statuses;

foreach my $page (1 .. $device_report_rs->pager->last_page) {
$device_report_rs = $device_report_rs->page($page);
while (my $device_report = $device_report_rs->next) {
++$report_count;
print '.' if $report_count % 100 == 0;

# capture information about the latest report we just fetched.
push @report_statuses, $device_report;

# we maintain a sliding window of (at least?) 3 reports.
# We can consider what to do about the middle report now.

# prevprev previous current delete previous?
# dne dne FAIL 0 previous report does not exist
# dne dne PASS 0 previous report does not exist
# dne FAIL FAIL 0 keep first
# dne FAIL PASS 0 keep first
# dne PASS FAIL 0 keep first
# dne PASS PASS 0 keep first
# FAIL FAIL FAIL 0 keep reports that fail
# FAIL FAIL PASS 0 keep reports that fail
# FAIL PASS FAIL 0 keep first pass after a failure
# FAIL PASS PASS 0 keep first pass after a failure
# PASS FAIL FAIL 0 keep reports that fail
# PASS FAIL PASS 0 keep reports that fail
# PASS PASS FAIL 0 last pass before a failure
# PASS PASS PASS 1

# we only delete the previous report (index [-2]) iff:
# - the current report was a pass
# - the previous exists and was a pass
# - the previous-previous exists and was a pass

push @delete_report_ids, $report_statuses[-2]{id}
if $report_statuses[-1]{status} eq 'pass'
and $report_statuses[-2] and $report_statuses[-2]{status} eq 'pass'
and $report_statuses[-3] and $report_statuses[-3]{status} eq 'pass';

# forget about the oldest report if we are watching at least 3.
shift @report_statuses if $report_statuses[-3];
}
}
->search({ 'device_report.id' => { -in => \[ $query, $device->id ] } });

print "\n";
my $device_reports_deleted;
my $validation_results_deleted;

my ($device_reports_deleted, $validation_results_deleted) = (0,0);
print "\n";

if ($self->dry_run) {
say 'Would delete ', scalar(@delete_report_ids), ' reports for device id ', $device->id,
$device_reports_deleted = $device_report_rs->count;
say 'Would delete ', $device_reports_deleted, ' reports for device id ', $device->id,
' out of ', $report_count, ' examined.';
}
else {
# delete all reports that we identified for deletion
# this may also cause cascade deletes on validation_state, validation_state_member.
say 'deleting ', scalar(@delete_report_ids), ' reports for device id ', $device->id,
' out of ', $report_count, ' examined...';

# delete reports 100 records at a time
my $pager = Data::Page->new(scalar @delete_report_ids, 100);
for ($pager->first_page .. $pager->last_page) {
my @ids = $pager->splice(\@delete_report_ids);
last if not @ids;
$pager->current_page($pager->current_page + 1);
say 'deleting reports for device id ', $device->id, '...';

$device_reports_deleted += $device->delete_related('device_reports', { id => { -in => \@ids } });
}
$device_reports_deleted = $device_report_rs->delete;
say 'deleted ', $device_reports_deleted, ' reports for device id ', $device->id,
' out of ', $report_count, ' examined';

# delete all newly-orphaned validation_result rows for this device
# now delete all newly-orphaned validation_result rows for this device
$validation_results_deleted = $device->delete_related('validation_results',
{ 'validation_state_members.validation_state_id' => undef },
{ join => 'validation_state_members' },
);

say 'deleted ', $validation_results_deleted,
' validation_result rows for device id ', $device->id;
}

print "\n";
Expand Down
29 changes: 29 additions & 0 deletions sql/migrations/0171-delete-old-device_reports.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
SELECT run_migration(171, $$

-- create a placeholder validation_state record for every device_report without a
-- validation_state record pointing to it
-- (reports created before 2019-01-08 were not associated with validation_states, so we had to
-- match them up as best we could based on similar timestamps, and not everyone found his
-- match.)
insert into validation_state (validation_plan_id, created, status, device_report_id, device_id, hardware_product_id)
(
select
hardware_product.validation_plan_id,
device_report.created,
'error',
device_report.id,
device_report.device_id,
device.hardware_product_id
from device_report
left join validation_state on validation_state.device_report_id = device_report.id
left join device on device_report.device_id = device.id
left join hardware_product on hardware_product.id = device.hardware_product_id
where validation_state.id is null
);

-- now we can run bin/conch thin_device_reports, which will delete all device_report rows
-- (cascading to validation_state, validation_state_member) which are older than six months,
-- except for the most recent report of each validation status (error, fail, pass)..
-- and then delete newly-orphaned validation_result rows.

$$);

0 comments on commit 569c623

Please sign in to comment.