Skip to content

Commit

Permalink
[AIRFLOW-3987] Unify GCP's Connection IDs (apache#4818)
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanyuan authored and Chad Henderson committed Apr 16, 2019
1 parent 53d5a3d commit a0d19e0
Show file tree
Hide file tree
Showing 15 changed files with 30 additions and 46 deletions.
11 changes: 11 additions & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,17 @@ assists users migrating to a new version.

## Airflow Master

#### Unify default conn_id for Google Cloud Platform

Previously not all hooks and operators related to Google Cloud Platform use
``google_cloud_default`` as a default conn_id. There is currently one default
variant. Values like ``google_cloud_storage_default``, ``bigquery_default``,
``google_cloud_datastore_default`` have been deprecated. The configuration of
existing relevant connections in the database have been preserved. To use those
deprecated GCP conn_id, you need to explicitly pass their conn_id into
operators/hooks. Otherwise, ``google_cloud_default`` will be used as GCP's conn_id
by default.

### The chain function is removed

Bit operation like `>>` or `<<` are recommended for setting the dependency, which is easier to explain.
Expand Down
2 changes: 1 addition & 1 deletion airflow/contrib/hooks/bigquery_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class BigQueryHook(GoogleCloudBaseHook, DbApiHook):
conn_name_attr = 'bigquery_conn_id'

def __init__(self,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
delegate_to=None,
use_legacy_sql=True,
location=None):
Expand Down
2 changes: 1 addition & 1 deletion airflow/contrib/hooks/datastore_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class DatastoreHook(GoogleCloudBaseHook):
"""

def __init__(self,
datastore_conn_id='google_cloud_datastore_default',
datastore_conn_id='google_cloud_default',
delegate_to=None):
super(DatastoreHook, self).__init__(datastore_conn_id, delegate_to)
self.connection = self.get_conn()
Expand Down
6 changes: 3 additions & 3 deletions airflow/contrib/operators/bigquery_check_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class BigQueryCheckOperator(CheckOperator):
@apply_defaults
def __init__(self,
sql,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
use_legacy_sql=True,
*args, **kwargs):
super(BigQueryCheckOperator, self).__init__(sql=sql, *args, **kwargs)
Expand Down Expand Up @@ -91,7 +91,7 @@ class BigQueryValueCheckOperator(ValueCheckOperator):
def __init__(self, sql,
pass_value,
tolerance=None,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
use_legacy_sql=True,
*args, **kwargs):
super(BigQueryValueCheckOperator, self).__init__(
Expand Down Expand Up @@ -131,7 +131,7 @@ class BigQueryIntervalCheckOperator(IntervalCheckOperator):

@apply_defaults
def __init__(self, table, metrics_thresholds, date_filter_column='ds',
days_back=-7, bigquery_conn_id='bigquery_default',
days_back=-7, bigquery_conn_id='google_cloud_default',
use_legacy_sql=True, *args, **kwargs):
super(BigQueryIntervalCheckOperator, self).__init__(
table=table, metrics_thresholds=metrics_thresholds,
Expand Down
2 changes: 1 addition & 1 deletion airflow/contrib/operators/bigquery_get_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self,
table_id,
max_results='100',
selected_fields=None,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
delegate_to=None,
*args,
**kwargs):
Expand Down
10 changes: 5 additions & 5 deletions airflow/contrib/operators/bigquery_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def __init__(self,
write_disposition='WRITE_EMPTY',
allow_large_results=False,
flatten_results=None,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
delegate_to=None,
udf_config=None,
use_legacy_sql=True,
Expand Down Expand Up @@ -296,7 +296,7 @@ def __init__(self,
schema_fields=None,
gcs_schema_object=None,
time_partitioning=None,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
google_cloud_storage_conn_id='google_cloud_default',
delegate_to=None,
labels=None,
Expand Down Expand Up @@ -436,7 +436,7 @@ def __init__(self,
quote_character=None,
allow_quoted_newlines=False,
allow_jagged_rows=False,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
google_cloud_storage_conn_id='google_cloud_default',
delegate_to=None,
src_fmt_configs=None,
Expand Down Expand Up @@ -532,7 +532,7 @@ class BigQueryDeleteDatasetOperator(BaseOperator):
def __init__(self,
dataset_id,
project_id=None,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
delegate_to=None,
*args, **kwargs):
self.dataset_id = dataset_id
Expand Down Expand Up @@ -594,7 +594,7 @@ def __init__(self,
dataset_id,
project_id=None,
dataset_reference=None,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
delegate_to=None,
*args, **kwargs):
self.dataset_id = dataset_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class BigQueryTableDeleteOperator(BaseOperator):
@apply_defaults
def __init__(self,
deletion_dataset_table,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
delegate_to=None,
ignore_if_missing=False,
*args,
Expand Down
2 changes: 1 addition & 1 deletion airflow/contrib/operators/bigquery_to_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(self,
destination_project_dataset_table,
write_disposition='WRITE_EMPTY',
create_disposition='CREATE_IF_NEEDED',
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
delegate_to=None,
labels=None,
*args,
Expand Down
2 changes: 1 addition & 1 deletion airflow/contrib/operators/bigquery_to_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(self,
export_format='CSV',
field_delimiter=',',
print_header=True,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
delegate_to=None,
labels=None,
*args,
Expand Down
2 changes: 1 addition & 1 deletion airflow/contrib/operators/gcs_to_bq.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def __init__(self,
allow_quoted_newlines=False,
allow_jagged_rows=False,
max_id_key=None,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
google_cloud_storage_conn_id='google_cloud_default',
delegate_to=None,
schema_update_options=(),
Expand Down
2 changes: 1 addition & 1 deletion airflow/contrib/operators/gcs_to_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(self,
bucket,
prefix=None,
delimiter=None,
google_cloud_storage_conn_id='google_cloud_storage_default',
google_cloud_storage_conn_id='google_cloud_default',
delegate_to=None,
dest_aws_conn_id=None,
dest_s3_key=None,
Expand Down
2 changes: 1 addition & 1 deletion airflow/contrib/sensors/bigquery_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self,
project_id,
dataset_id,
table_id,
bigquery_conn_id='bigquery_default_conn',
bigquery_conn_id='google_cloud_default',
delegate_to=None,
*args, **kwargs):

Expand Down
4 changes: 0 additions & 4 deletions airflow/utils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,6 @@ def initdb():
conn_id='airflow_db', conn_type='mysql',
host='mysql', login='root', password='',
schema='airflow'))
merge_conn(
Connection(
conn_id='bigquery_default', conn_type='google_cloud_platform',
schema='default'))
merge_conn(
Connection(
conn_id='local_mysql', conn_type='mysql',
Expand Down
25 changes: 1 addition & 24 deletions docs/howto/connection/gcp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,30 +36,7 @@ There are two ways to connect to GCP using Airflow.
Default Connection IDs
----------------------

The following connection IDs are used by default.

``bigquery_default``
Used by the :class:`~airflow.contrib.hooks.bigquery_hook.BigQueryHook`
hook.

``google_cloud_datastore_default``
Used by the :class:`~airflow.contrib.hooks.datastore_hook.DatastoreHook`
hook.

``google_cloud_default``
Used by those hooks:

* :class:`~airflow.contrib.hooks.gcp_api_base_hook.GoogleCloudBaseHook`
* :class:`~airflow.contrib.hooks.gcp_dataflow_hook.DataFlowHook`
* :class:`~airflow.contrib.hooks.gcp_dataproc_hook.DataProcHook`
* :class:`~airflow.contrib.hooks.gcp_mlengine_hook.MLEngineHook`
* :class:`~airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook`
* :class:`~airflow.contrib.hooks.gcp_bigtable_hook.BigtableHook`
* :class:`~airflow.contrib.hooks.gcp_compute_hook.GceHook`
* :class:`~airflow.contrib.hooks.gcp_function_hook.GcfHook`
* :class:`~airflow.contrib.hooks.gcp_spanner_hook.CloudSpannerHook`
* :class:`~airflow.contrib.hooks.gcp_sql_hook.CloudSqlHook`

All hooks and operators related to Google Cloud Platform use ``google_cloud_default`` by default.

Configuring the Connection
--------------------------
Expand Down
2 changes: 1 addition & 1 deletion tests/contrib/operators/test_bigquery_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def test_execute(self, mock_hook):
write_disposition='WRITE_EMPTY',
allow_large_results=False,
flatten_results=None,
bigquery_conn_id='bigquery_default',
bigquery_conn_id='google_cloud_default',
udf_config=None,
use_legacy_sql=True,
maximum_billing_tier=None,
Expand Down

0 comments on commit a0d19e0

Please sign in to comment.