-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* WIP: Job history purging * Created test cases that handle variations of job history purging configuration * Typo fix * Expanded test cases to control for job history on multiple connections at once. * Handle latest job with saved state correctly regardless of order of ids * Whitespace * Externalized sql. Cleaned up constants. * Cleaned up test case persistence code and structure * Whitespace and formatting per standard tooling.
- Loading branch information
1 parent
d7eafe5
commit 61d597d
Showing
5 changed files
with
321 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
100 changes: 100 additions & 0 deletions
100
airbyte-scheduler/persistence/src/main/resources/job_history_purge.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
DELETE | ||
FROM | ||
jobs | ||
WHERE | ||
jobs.id IN( | ||
SELECT | ||
jobs.id | ||
FROM | ||
jobs | ||
LEFT JOIN( | ||
SELECT | ||
SCOPE, | ||
COUNT( jobs.id ) AS jobCount | ||
FROM | ||
jobs | ||
GROUP BY | ||
SCOPE | ||
) counts ON | ||
jobs.scope = counts.scope | ||
WHERE | ||
-- job must be at least MINIMUM_AGE_IN_DAYS old or connection has more than EXCESSIVE_NUMBER_OF_JOBS | ||
( | ||
jobs.created_at <( | ||
TO_TIMESTAMP( | ||
?, | ||
'YYYY-MM-DD' | ||
)- INTERVAL '%d' DAY | ||
) | ||
OR counts.jobCount >? | ||
) | ||
AND jobs.id NOT IN( | ||
-- cannot be the most recent job with saved state | ||
SELECT | ||
job_id AS latest_job_id_with_state | ||
FROM | ||
( | ||
SELECT | ||
jobs.scope, | ||
jobs.id AS job_id, | ||
jobs.config_type, | ||
jobs.created_at, | ||
jobs.status, | ||
bool_or( | ||
attempts."output" -> 'sync' -> 'state' -> 'state' IS NOT NULL | ||
) AS outputStateExists, | ||
ROW_NUMBER() OVER( | ||
PARTITION BY SCOPE | ||
ORDER BY | ||
jobs.created_at DESC, | ||
jobs.id DESC | ||
) AS stateRecency | ||
FROM | ||
jobs | ||
LEFT JOIN attempts ON | ||
jobs.id = attempts.job_id | ||
GROUP BY | ||
SCOPE, | ||
jobs.id | ||
HAVING | ||
bool_or( | ||
attempts."output" -> 'sync' -> 'state' -> 'state' IS NOT NULL | ||
)= TRUE | ||
ORDER BY | ||
SCOPE, | ||
jobs.created_at DESC, | ||
jobs.id DESC | ||
) jobs_with_state | ||
WHERE | ||
stateRecency = 1 | ||
) | ||
AND jobs.id NOT IN( | ||
-- cannot be one of the last MINIMUM_RECENCY jobs for that connection/scope | ||
SELECT | ||
id | ||
FROM | ||
( | ||
SELECT | ||
jobs.scope, | ||
jobs.id, | ||
jobs.created_at, | ||
ROW_NUMBER() OVER( | ||
PARTITION BY SCOPE | ||
ORDER BY | ||
jobs.created_at DESC, | ||
jobs.id DESC | ||
) AS recency | ||
FROM | ||
jobs | ||
GROUP BY | ||
SCOPE, | ||
jobs.id | ||
ORDER BY | ||
SCOPE, | ||
jobs.created_at DESC, | ||
jobs.id DESC | ||
) jobs_by_recency | ||
WHERE | ||
recency <=? | ||
) | ||
) |
Oops, something went wrong.