-
Notifications
You must be signed in to change notification settings - Fork 318
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Model and store column lineage in Marquez DB (#2096)
* Create database representation, model classes Signed-off-by: mzareba <[email protected]> * Implement ColumnLevelLineageDao Signed-off-by: mzareba <[email protected]> * Instantiate ColumnLevelLineageDao in updateBaseMarquezModel Signed-off-by: mzareba <[email protected]> * Upsert ColumnLevelLineageRow to db, model representation in LineageEvent Signed-off-by: mzareba <[email protected]> * Fix problems in OpenLineageDao, add a list of ColumnLevelLineageRow to DatasetRecord, write test for createLineageRow() invocation Signed-off-by: mzareba <[email protected]> * Change wildcard imports to single class imports Signed-off-by: mzareba <[email protected]> * Change wildcard imports to single class imports Signed-off-by: mzareba <[email protected]> * Change wildcard imports to single class imports Signed-off-by: mzareba <[email protected]> * Apply spotless Signed-off-by: mzareba <[email protected]> * Check for ds.getFacets not null Signed-off-by: mzareba <[email protected]> * Format fix Signed-off-by: mzareba <[email protected]> * Update testUpdateMarquezModelDatasetWithColumnLineageFacet Signed-off-by: mzareba <[email protected]> * Test for column_level_lineage upsert. Signed-off-by: mzareba <[email protected]> * Apply spotless Signed-off-by: mzareba <[email protected]> * switch to data field references Signed-off-by: Pawel Leszczynski <[email protected]> * fix broken tests Signed-off-by: Pawel Leszczynski <[email protected]> * test when dataset_field is missing Signed-off-by: Pawel Leszczynski <[email protected]> * add input_dataset_version_uuid field Signed-off-by: Pawel Leszczynski <[email protected]> * increase db file version Signed-off-by: Pawel Leszczynski <[email protected]> * increase db file version Signed-off-by: Pawel Leszczynski <[email protected]> * rename ColumnLevelLineage -> ColumnLineage Signed-off-by: Pawel Leszczynski <[email protected]> Signed-off-by: mzareba <[email protected]> Signed-off-by: Pawel Leszczynski <[email protected]> Co-authored-by: Mariusz Zaręba <[email protected]> Co-authored-by: Pawel Leszczynski <[email protected]>
- Loading branch information
1 parent
2909864
commit b6544ec
Showing
15 changed files
with
856 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
/* | ||
* Copyright 2018-2022 contributors to the Marquez project | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package marquez.db; | ||
|
||
import java.time.Instant; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.UUID; | ||
import java.util.stream.Collectors; | ||
import marquez.db.mappers.ColumnLineageRowMapper; | ||
import marquez.db.models.ColumnLineageRow; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
import org.jdbi.v3.sqlobject.config.RegisterRowMapper; | ||
import org.jdbi.v3.sqlobject.customizer.BindBeanList; | ||
import org.jdbi.v3.sqlobject.statement.SqlQuery; | ||
import org.jdbi.v3.sqlobject.statement.SqlUpdate; | ||
|
||
@RegisterRowMapper(ColumnLineageRowMapper.class) | ||
public interface ColumnLineageDao extends BaseDao { | ||
|
||
default List<ColumnLineageRow> upsertColumnLineageRow( | ||
UUID outputDatasetVersionUuid, | ||
UUID outputDatasetFieldUuid, | ||
List<Pair<UUID, UUID>> inputs, | ||
String transformationDescription, | ||
String transformationType, | ||
Instant now) { | ||
|
||
if (inputs.isEmpty()) { | ||
return Collections.emptyList(); | ||
} | ||
|
||
doUpsertColumnLineageRow( | ||
inputs.stream() | ||
.map( | ||
input -> | ||
new ColumnLineageRow( | ||
outputDatasetVersionUuid, | ||
outputDatasetFieldUuid, | ||
input.getLeft(), // input_dataset_version_uuid | ||
input.getRight(), // input_dataset_field_uuid | ||
transformationDescription, | ||
transformationType, | ||
now, | ||
now)) | ||
.collect(Collectors.toList())); | ||
return findColumnLineageByDatasetVersionColumnAndOutputDatasetField( | ||
outputDatasetVersionUuid, outputDatasetFieldUuid); | ||
} | ||
|
||
@SqlQuery( | ||
"SELECT * FROM column_lineage WHERE output_dataset_version_uuid = :datasetVersionUuid AND output_dataset_field_uuid = :outputDatasetFieldUuid") | ||
List<ColumnLineageRow> findColumnLineageByDatasetVersionColumnAndOutputDatasetField( | ||
UUID datasetVersionUuid, UUID outputDatasetFieldUuid); | ||
|
||
@SqlUpdate( | ||
""" | ||
INSERT INTO column_lineage ( | ||
output_dataset_version_uuid, | ||
output_dataset_field_uuid, | ||
input_dataset_version_uuid, | ||
input_dataset_field_uuid, | ||
transformation_description, | ||
transformation_type, | ||
created_at, | ||
updated_at | ||
) VALUES <values> | ||
ON CONFLICT (output_dataset_version_uuid, output_dataset_field_uuid, input_dataset_version_uuid, input_dataset_field_uuid) | ||
DO UPDATE SET | ||
transformation_description = EXCLUDED.transformation_description, | ||
transformation_type = EXCLUDED.transformation_type, | ||
updated_at = EXCLUDED.updated_at | ||
""") | ||
void doUpsertColumnLineageRow( | ||
@BindBeanList( | ||
propertyNames = { | ||
"outputDatasetVersionUuid", | ||
"outputDatasetFieldUuid", | ||
"inputDatasetVersionUuid", | ||
"inputDatasetFieldUuid", | ||
"transformationDescription", | ||
"transformationType", | ||
"createdAt", | ||
"updatedAt" | ||
}, | ||
value = "values") | ||
List<ColumnLineageRow> rows); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
37 changes: 37 additions & 0 deletions
37
api/src/main/java/marquez/db/mappers/ColumnLineageRowMapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* Copyright 2018-2022 contributors to the Marquez project | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package marquez.db.mappers; | ||
|
||
import static marquez.db.Columns.TRANSFORMATION_DESCRIPTION; | ||
import static marquez.db.Columns.TRANSFORMATION_TYPE; | ||
import static marquez.db.Columns.stringOrThrow; | ||
import static marquez.db.Columns.timestampOrThrow; | ||
import static marquez.db.Columns.uuidOrThrow; | ||
|
||
import java.sql.ResultSet; | ||
import java.sql.SQLException; | ||
import lombok.NonNull; | ||
import marquez.db.Columns; | ||
import marquez.db.models.ColumnLineageRow; | ||
import org.jdbi.v3.core.mapper.RowMapper; | ||
import org.jdbi.v3.core.statement.StatementContext; | ||
|
||
public class ColumnLineageRowMapper implements RowMapper<ColumnLineageRow> { | ||
|
||
@Override | ||
public ColumnLineageRow map(@NonNull ResultSet results, @NonNull StatementContext context) | ||
throws SQLException { | ||
return new ColumnLineageRow( | ||
uuidOrThrow(results, Columns.OUTPUT_DATASET_VERSION_UUID), | ||
uuidOrThrow(results, Columns.OUTPUT_DATASET_FIELD_UUID), | ||
uuidOrThrow(results, Columns.INPUT_DATASET_VERSION_UUID), | ||
uuidOrThrow(results, Columns.INPUT_DATASET_FIELD_UUID), | ||
stringOrThrow(results, TRANSFORMATION_DESCRIPTION), | ||
stringOrThrow(results, TRANSFORMATION_TYPE), | ||
timestampOrThrow(results, Columns.CREATED_AT), | ||
timestampOrThrow(results, Columns.UPDATED_AT)); | ||
} | ||
} |
Oops, something went wrong.