-
Notifications
You must be signed in to change notification settings - Fork 318
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add job tagging to API #2774
Add job tagging to API #2774
Changes from 10 commits
0d44966
d114c3f
fd76893
655bf3a
02e7484
58f40cf
1d3f819
057bf14
4d1550a
da04b41
3f429f9
911549b
2750d66
2b8ae9a
b73c6b3
ca332d6
322ed34
d127354
a64c2f6
4517062
f726170
4065cb5
4c18168
10703c1
bf6621e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,14 +61,53 @@ SELECT EXISTS ( | |
@SqlQuery( | ||
""" | ||
WITH job_versions_facets AS ( | ||
SELECT job_version_uuid, JSON_AGG(facet) as facets | ||
FROM job_facets | ||
GROUP BY job_version_uuid | ||
SELECT | ||
job_version_uuid | ||
, JSON_AGG(facet) as facets | ||
FROM | ||
job_facets | ||
GROUP BY | ||
job_version_uuid | ||
), | ||
job_tags as ( | ||
SELECT | ||
j.uuid | ||
, ARRAY_AGG(t.name) as tags | ||
FROM | ||
jobs j | ||
INNER JOIN | ||
jobs_tag_mapping jtm | ||
ON | ||
jtm.job_uuid = j.uuid | ||
AND | ||
j.simple_name = :jobName | ||
AND | ||
j.namespace_name = :namespaceName | ||
INNER JOIN | ||
tags t | ||
ON | ||
jtm.tag_uuid = t.uuid | ||
GROUP BY | ||
j.uuid | ||
) | ||
SELECT j.*, facets | ||
FROM jobs_view j | ||
LEFT OUTER JOIN job_versions_facets f ON j.current_version_uuid = f.job_version_uuid | ||
WHERE j.namespace_name=:namespaceName AND (j.name=:jobName OR :jobName = ANY(j.aliases)) | ||
SELECT | ||
j.* | ||
, facets | ||
, jt.tags as tags | ||
FROM | ||
jobs_view j | ||
LEFT OUTER JOIN | ||
job_versions_facets f | ||
ON | ||
j.current_version_uuid = f.job_version_uuid | ||
LEFT OUTER JOIN | ||
job_tags jt | ||
ON | ||
j.uuid = jt.uuid | ||
WHERE | ||
j.namespace_name = :namespaceName | ||
AND | ||
(j.name = :jobName OR :jobName = ANY(j.aliases)) | ||
""") | ||
Optional<Job> findJobByName(String namespaceName, String jobName); | ||
|
||
|
@@ -169,16 +208,38 @@ facets_temp AS ( | |
lineage_event_time ASC | ||
) e | ||
GROUP BY e.run_uuid | ||
) | ||
), | ||
job_tags as ( | ||
SELECT | ||
j.uuid | ||
, ARRAY_AGG(t.name) as tags | ||
FROM | ||
jobs j | ||
INNER JOIN | ||
jobs_tag_mapping jtm | ||
ON | ||
jtm.job_uuid = j.uuid | ||
AND | ||
j.namespace_name = :namespaceName | ||
INNER JOIN | ||
tags t | ||
ON | ||
jtm.tag_uuid = t.uuid | ||
GROUP BY | ||
j.uuid | ||
) | ||
SELECT | ||
j.*, | ||
f.facets | ||
f.facets, | ||
COALESCE(jt.tags, ARRAY[]::VARCHAR[]) AS tags | ||
FROM | ||
jobs_view_page AS j | ||
LEFT OUTER JOIN job_versions_temp AS jv | ||
ON jv.uuid = j.current_version_uuid | ||
LEFT OUTER JOIN facets_temp AS f | ||
ON f.run_uuid = jv.latest_run_uuid | ||
LEFT OUTER JOIN job_tags jt | ||
ON j.uuid = jt.uuid | ||
ORDER BY | ||
j.name | ||
""") | ||
|
@@ -386,4 +447,64 @@ JobRow upsertJob( | |
String location, | ||
UUID symlinkTargetId, | ||
PGobject inputs); | ||
|
||
@SqlUpdate( | ||
""" | ||
WITH new_tag AS ( | ||
INSERT INTO tags (uuid, created_at, updated_at, name, description) | ||
SELECT | ||
gen_random_uuid(), | ||
NOW(), | ||
wslulciuc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
NOW(), | ||
:tagName, | ||
'No Description' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason we would want to hardcode this in the SQL, instead of in our application code to be a bit more flexible? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I can leave as null - was just being explicit more than anything but it is inconsistent with dataset/dataset field tagging so makes sense to set to null. |
||
WHERE | ||
NOT EXISTS (SELECT 1 FROM tags WHERE name = :tagName) | ||
RETURNING uuid | ||
), | ||
existing_tag AS ( | ||
SELECT uuid FROM tags WHERE name = :tagName | ||
), | ||
job AS ( | ||
SELECT | ||
uuid | ||
FROM | ||
jobs | ||
WHERE | ||
simple_name = :jobName | ||
and | ||
namespace_name = :namespaceName | ||
) | ||
INSERT INTO jobs_tag_mapping (job_uuid, tag_uuid, tagged_at) | ||
SELECT | ||
(SELECT uuid FROM job) | ||
, COALESCE((SELECT uuid FROM new_tag), (SELECT uuid FROM existing_tag)) | ||
, NOW() | ||
ON CONFLICT DO NOTHING | ||
davidsharp7 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
; | ||
""") | ||
void updateJobTags(String namespaceName, String jobName, String tagName); | ||
|
||
@SqlUpdate( | ||
""" | ||
DELETE FROM jobs_tag_mapping jtm | ||
WHERE EXISTS ( | ||
SELECT 1 | ||
FROM | ||
jobs j | ||
JOIN | ||
tags t | ||
ON | ||
j.uuid = jtm.job_uuid | ||
AND | ||
t.uuid = jtm.tag_uuid | ||
WHERE | ||
t.name = :tagName | ||
AND | ||
j.simple_name = :jobName | ||
AND | ||
j.namespace_name = :namespaceName | ||
); | ||
""") | ||
void deleteJobTags(String namespaceName, String jobName, String tagName); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
|
||
import com.google.common.collect.ImmutableList; | ||
import com.google.common.collect.ImmutableMap; | ||
import com.google.common.collect.ImmutableSet; | ||
import java.net.URL; | ||
import java.time.Instant; | ||
import java.util.Optional; | ||
|
@@ -23,6 +24,7 @@ | |
import marquez.common.models.JobName; | ||
import marquez.common.models.JobType; | ||
import marquez.common.models.NamespaceName; | ||
import marquez.common.models.TagName; | ||
|
||
@EqualsAndHashCode | ||
@ToString | ||
|
@@ -43,6 +45,7 @@ public final class Job { | |
@Getter private final ImmutableMap<String, Object> facets; | ||
@Nullable private UUID currentVersion; | ||
@Getter @Nullable private ImmutableList<String> labels; | ||
@Getter private final ImmutableSet<TagName> tags; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we mark this |
||
|
||
public Job( | ||
@NonNull final JobId id, | ||
|
@@ -59,7 +62,8 @@ public Job( | |
@Nullable final Run latestRun, | ||
@Nullable final ImmutableMap<String, Object> facets, | ||
@Nullable UUID currentVersion, | ||
@Nullable ImmutableList<String> labels) { | ||
@Nullable ImmutableList<String> labels, | ||
@Nullable final ImmutableSet<TagName> tags) { | ||
this.id = id; | ||
this.type = type; | ||
this.name = name; | ||
|
@@ -76,6 +80,7 @@ public Job( | |
this.facets = (facets == null) ? ImmutableMap.of() : facets; | ||
this.currentVersion = currentVersion; | ||
this.labels = (labels == null) ? ImmutableList.of() : labels; | ||
this.tags = (tags == null) ? ImmutableSet.of() : tags; | ||
} | ||
|
||
public Optional<URL> getLocation() { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
/* SPDX-License-Identifier: Apache-2.0 */ | ||
CREATE TABLE jobs_tag_mapping ( | ||
job_uuid UUID REFERENCES jobs(uuid), | ||
tag_uuid UUID REFERENCES tags(uuid), | ||
tagged_at TIMESTAMP NOT NULL, | ||
wslulciuc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
PRIMARY KEY (tag_uuid, job_uuid) | ||
); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this should be quite unlikely given
updateJobTags
would presumably throw if the job didn't exist? A case of, we get an optional back so we should do something beyond an unqualifiedget()
on it?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah understand what you mean -it will fall over before it gets to this point so why bother? Mainly as that seems to be the de-facto pattern for a lot of the code i.e
execute something -> retrieve object (job, dataset etc) else throw an error.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for following our code style!