diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c6d656ecfd4e98..1a61adb5e6727b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -293,6 +293,31 @@ repos: # changes quickly - especially when we want the early modifications from the first local group # to be applied before the non-local pre-commits are run hooks: + - id: validate-operators-init + name: Prevent templated field logic checks in operators' __init__ + language: python + entry: ./scripts/ci/pre_commit/pre_commit_validate_operators_init.py + pass_filenames: true + files: ^airflow/providers/.*/(operators|transfers|sensors)/.*\.py$ + additional_dependencies: [ 'rich>=12.4.4' ] + # TODO: Handle the provider-specific exclusions and remove them from the list, see: + # https://github.com/apache/airflow/issues/36484 + exclude: | + (?x)^( + ^.*__init__\.py$| + ^airflow\/providers\/google\/cloud\/operators\/bigquery\.py$| + ^airflow\/providers\/amazon\/aws\/transfers\/gcs_to_s3\.py$| + ^airflow\/providers\/databricks\/operators\/databricks\.py$| + ^airflow\/providers\/google\/cloud\/operators\/cloud_storage_transfer_service\.py$| + ^airflow\/providers\/google\/cloud\/transfers\/bigquery_to_mysql\.py$| + ^airflow\/providers\/google\/cloud\/operators\/vertex_ai\/auto_ml\.py$| + ^airflow\/providers\/amazon\/aws\/transfers\/redshift_to_s3\.py$| + ^airflow\/providers\/google\/cloud\/operators\/compute\.py$| + ^airflow\/providers\/google\/cloud\/operators\/vertex_ai\/custom_job\.py$| + ^airflow\/providers\/cncf\/kubernetes\/operators\/pod\.py$| + ^airflow\/providers\/amazon\/aws\/operators\/emr\.py$| + ^airflow\/providers\/amazon\/aws\/operators\/eks\.py$ + )$ - id: ruff name: Run 'ruff' for extremely fast Python linting description: "Run 'ruff' for extremely fast Python linting" diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst index 51712f0a9402fa..8a1923f8823f09 100644 --- a/STATIC_CODE_CHECKS.rst +++ b/STATIC_CODE_CHECKS.rst @@ -414,6 +414,8 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | update-version | Update version to the latest version in the documentation | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ +| validate-operators-init | Prevent templated field logic checks in operators' __init__ | | ++-----------------------------------------------------------+--------------------------------------------------------------+---------+ | yamllint | Check YAML files with yamllint | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ diff --git a/dev/breeze/src/airflow_breeze/pre_commit_ids.py b/dev/breeze/src/airflow_breeze/pre_commit_ids.py index 3c79efeae2e6e4..810b00c6397e8b 100644 --- a/dev/breeze/src/airflow_breeze/pre_commit_ids.py +++ b/dev/breeze/src/airflow_breeze/pre_commit_ids.py @@ -133,5 +133,6 @@ "update-supported-versions", "update-vendored-in-k8s-json-schema", "update-version", + "validate-operators-init", "yamllint", ] diff --git a/docs/apache-airflow/howto/custom-operator.rst b/docs/apache-airflow/howto/custom-operator.rst index 012409ec74b845..ce32654a6b6902 100644 --- a/docs/apache-airflow/howto/custom-operator.rst +++ b/docs/apache-airflow/howto/custom-operator.rst @@ -269,6 +269,88 @@ Currently available lexers: If you use a non-existing lexer then the value of the template field will be rendered as a pretty-printed object. +Limitations +^^^^^^^^^^^ +To prevent misuse, the following limitations must be observed when defining and assigning templated fields in the +operator's constructor (when such exists, otherwise - see below): + +1. Templated fields' corresponding parameters passed into the constructor must be named exactly +as the fields. The following example is invalid, as the parameter passed into the constructor is not the same as the +templated field: + +.. code-block:: python + + class HelloOperator(BaseOperator): + template_fields = "field_a" + + def __init__(field_a_id) -> None: # <- should be def __init__(field_a)-> None + self.field_a = field_a_id # <- should be self.field_a = field_a + +2. Templated fields' instance members must be assigned with their corresponding parameter from the constructor, +either by a direct assignment or by calling the parent's constructor (in which these fields are +defined as ``template_fields``) with explicit an assignment of the parameter. +The following example is invalid, as the instance member ``self.field_a`` is not assigned at all, despite being a +templated field: + +.. code-block:: python + + class HelloOperator(BaseOperator): + template_fields = ("field_a", "field_b") + + def __init__(field_a, field_b) -> None: + self.field_b = field_b + + +The following example is also invalid, as the instance member ``self.field_a`` of ``MyHelloOperator`` is initialized +implicitly as part of the ``kwargs`` passed to its parent constructor: + +.. code-block:: python + + class HelloOperator(BaseOperator): + template_fields = "field_a" + + def __init__(field_a) -> None: + self.field_a = field_a + + + class MyHelloOperator(HelloOperator): + template_fields = ("field_a", "field_b") + + def __init__(field_b, **kwargs) -> None: # <- should be def __init__(field_a, field_b, **kwargs) + super().__init__(**kwargs) # <- should be super().__init__(field_a=field_a, **kwargs) + self.field_b = field_b + +3. Applying actions on the parameter during the assignment in the constructor is not allowed. +Any action on the value should be applied in the ``execute()`` method. +Therefore, the following example is invalid: + +.. code-block:: python + + class HelloOperator(BaseOperator): + template_fields = "field_a" + + def __init__(field_a) -> None: + self.field_a = field_a.lower() # <- assignment should be only self.field_a = field_a + +When an operator inherits from a base operator and does not have a constructor defined on its own, the limitations above +do not apply. However, the templated fields must be set properly in the parent according to those limitations. + +Thus, the following example is valid: + +.. code-block:: python + + class HelloOperator(BaseOperator): + template_fields = "field_a" + + def __init__(field_a) -> None: + self.field_a = field_a + + + class MyHelloOperator(HelloOperator): + template_fields = "field_a" + +The limitations above are enforced by a pre-commit named 'validate-operators-init'. + Add template fields with subclassing ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ A common use case for creating a custom operator is for simply augmenting existing ``template_fields``. diff --git a/images/breeze/output_static-checks.svg b/images/breeze/output_static-checks.svg index 3979036fae0fd9..421c4dbb71ec95 100644 --- a/images/breeze/output_static-checks.svg +++ b/images/breeze/output_static-checks.svg @@ -1,4 +1,4 @@ - +