Skip to content

Commit

Permalink
fix(ingest): bigquery: multiproject profiling fix (#5474)
Browse files Browse the repository at this point in the history
  • Loading branch information
treff7es authored Jul 23, 2022
1 parent f8697ba commit cb05159
Showing 1 changed file with 6 additions and 9 deletions.
15 changes: 6 additions & 9 deletions metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,10 @@ def generate_profile_candidates(
)
_client: BigQueryClient = BigQueryClient(project=exec_project_id)

full_schema_name = f"{storage_project_id}.{schema}"
# if schema contains a bare dataset name, then add a project_id o/w dont modify it
full_schema_name = (
f"{storage_project_id}.{schema}" if len(schema.split(".")) == 1 else schema
)
# Reading all tables' metadata to report
all_tables = _client.query(self.get_all_schema_tables_query(full_schema_name))
report_tables: List[str] = [
Expand All @@ -503,7 +506,7 @@ def generate_profile_candidates(
report_tables.append(
f"{table_row.table_id}, {table_row.size_bytes}, {table_row.last_modified_time}, {table_row.row_count}"
)
report_key = f"{self._get_project_id(inspector)}.{full_schema_name}"
report_key = f"{full_schema_name}"
self.report.table_metadata[report_key] = report_tables

query = self.generate_profile_candidate_query(threshold_time, full_schema_name)
Expand All @@ -516,13 +519,7 @@ def generate_profile_candidates(
query_job = _client.query(query)
_profile_candidates = []
for row in query_job:
_profile_candidates.append(
self.get_identifier(
schema=full_schema_name,
entity=row.table_id,
inspector=inspector,
)
)
_profile_candidates.append(f"{full_schema_name}.{row.table_id}")
logger.debug(
f"Generated profiling candidates for {schema}: {_profile_candidates}"
)
Expand Down

0 comments on commit cb05159

Please sign in to comment.