diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 262fa871a69f20..ff05e1d1857690 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -201,6 +201,13 @@ repos: entry: "./scripts/ci/pre_commit_lint_dockerfile.sh" files: ^Dockerfile.*$ pass_filenames: true + - id: setup-order + name: Checks for an order of dependencies in setup.py + language: python + files: ^setup.py$ + pass_filenames: false + require_serial: true + entry: tests/test_order_setup.py - id: update-breeze-file name: Update output of breeze command in BREEZE.rst entry: "./scripts/ci/pre_commit_breeze_cmd_line.sh" diff --git a/BREEZE.rst b/BREEZE.rst index be05e59741b62e..0b09f8b64356dc 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -872,7 +872,7 @@ This is the current syntax for `./breeze <./breeze>`_: -S, --static-check Run selected static checks for currently changed files. You should specify static check that you would like to run or 'all' to run all checks. One of - [ all check-hooks-apply check-merge-conflict check-executables-have-shebangs check-xml debug-statements detect-private-key doctoc end-of-file-fixer flake8 forbid-tabs insert-license check-apache-license lint-dockerfile mixed-line-ending mypy shellcheck]. + [ all check-apache-license check-executables-have-shebangs check-hooks-apply check-merge-conflict check-xml debug-statements doctoc detect-private-key end-of-file-fixer flake8 forbid-tabs insert-license lint-dockerfile mixed-line-ending mypy setup-order shellcheck]. You can pass extra arguments including options to to the pre-commit framework as passed after --. For example: @@ -886,7 +886,7 @@ This is the current syntax for `./breeze <./breeze>`_: -F, --static-check-all-files Run selected static checks for all applicable files. You should specify static check that you would like to run or 'all' to run all checks. One of - [ all check-hooks-apply check-merge-conflict check-executables-have-shebangs check-xml debug-statements detect-private-key doctoc end-of-file-fixer flake8 forbid-tabs insert-license check-apache-license lint-dockerfile mixed-line-ending mypy shellcheck]. + [ all check-apache-license check-executables-have-shebangs check-hooks-apply check-merge-conflict check-xml debug-statements doctoc detect-private-key end-of-file-fixer flake8 forbid-tabs insert-license lint-dockerfile mixed-line-ending mypy setup-order shellcheck]. You can pass extra arguments including options to the pre-commit framework as passed after --. For example: diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 918175b30a2f8d..79dcd51faa92fd 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -393,6 +393,8 @@ image built locally): ----------------------------------- ---------------------------------------------------------------- ------------ ``rst-backticks`` Checks if RST files use double backticks for code. ----------------------------------- ---------------------------------------------------------------- ------------ +``setup-order`` Checks for an order of dependencies in setup.py +----------------------------------- ---------------------------------------------------------------- ------------ ``shellcheck`` Checks shell files with shellcheck. ----------------------------------- ---------------------------------------------------------------- ------------ ``update-breeze-file`` Update output of breeze command in BREEZE.rst. diff --git a/breeze-complete b/breeze-complete index 8239ea7a0dd492..26e4e588d4b347 100644 --- a/breeze-complete +++ b/breeze-complete @@ -22,7 +22,7 @@ _BREEZE_ALLOWED_ENVS=" docker kubernetes " _BREEZE_ALLOWED_BACKENDS=" sqlite mysql postgres " _BREEZE_ALLOWED_KUBERNETES_VERSIONS=" v1.13.0 " _BREEZE_ALLOWED_KUBERNETES_MODES=" persistent_mode git_mode " -_BREEZE_ALLOWED_STATIC_CHECKS=" all check-hooks-apply check-merge-conflict check-executables-have-shebangs check-xml debug-statements detect-private-key doctoc end-of-file-fixer flake8 forbid-tabs insert-license check-apache-license lint-dockerfile mixed-line-ending mypy shellcheck" +_BREEZE_ALLOWED_STATIC_CHECKS=" all check-apache-license check-executables-have-shebangs check-hooks-apply check-merge-conflict check-xml debug-statements doctoc detect-private-key end-of-file-fixer flake8 forbid-tabs insert-license lint-dockerfile mixed-line-ending mypy setup-order shellcheck" _BREEZE_DEFAULT_DOCKERHUB_USER="apache" _BREEZE_DEFAULT_DOCKERHUB_REPO="airflow" diff --git a/setup.py b/setup.py index abf04469c23b75..2d87421e3cc51c 100644 --- a/setup.py +++ b/setup.py @@ -139,128 +139,223 @@ def write_version(filename=os.path.join(*["airflow", "git_version"])): file.write(text) +# 'Start dependencies group' and 'Start dependencies group' are mark for ./test/test_order_setup.py +# If you change this mark you should also change ./test/test_order_setup.py function test_main_dependent_group +# Start dependencies group async_packages = [ - 'greenlet>=0.4.9', 'eventlet>= 0.9.7', - 'gevent>=0.13' + 'gevent>=0.13', + 'greenlet>=0.4.9', +] +atlas = [ + 'atlasclient>=0.1.2', +] +azure_blob_storage = [ + 'azure-storage>=0.34.0' +] +azure_container_instances = [ + 'azure-mgmt-containerinstance>=1.5.0' +] +azure_cosmos = [ + 'azure-cosmos>=3.0.1' ] -atlas = ['atlasclient>=0.1.2'] -azure_blob_storage = ['azure-storage>=0.34.0'] azure_data_lake = [ - 'azure-mgmt-resource>=2.2.0', - 'azure-mgmt-datalake-store>=0.5.0', 'azure-datalake-store>=0.0.45' + 'azure-mgmt-datalake-store>=0.5.0', + 'azure-mgmt-resource>=2.2.0', +] +cassandra = [ + 'cassandra-driver>=3.13.0', ] -azure_cosmos = ['azure-cosmos>=3.0.1'] -azure_container_instances = ['azure-mgmt-containerinstance>=1.5.0'] -cassandra = ['cassandra-driver>=3.13.0'] celery = [ 'celery~=4.3', 'flower>=0.7.3, <1.0', + 'kombu==4.6.3', 'tornado>=4.2.0, <6.0', # Dep of flower. Pin to a version that works on Py3.5.2 - 'kombu==4.6.3' ] cgroups = [ 'cgroupspy>=0.1.4', ] -# major update coming soon, clamp to 0.x -cloudant = ['cloudant>=0.5.9,<2.0'] +cloudant = [ + 'cloudant>=0.5.9,<2.0', +] crypto = ['cryptography>=0.9.3'] dask = [ - 'distributed>=1.17.1, <2' + 'distributed>=1.17.1, <2', +] +databricks = [ + 'requests>=2.20.0, <3', +] +datadog = [ + 'datadog>=0.14.0', ] -databricks = ['requests>=2.20.0, <3'] -datadog = ['datadog>=0.14.0'] doc = [ + 'sphinx>=2.1.2;python_version>="3.0"', + 'sphinx==1.8.5;python_version<"3.0"', 'sphinx-argparse>=0.1.13', 'sphinx-autoapi==1.0.0', 'sphinx-rtd-theme>=0.1.6', - 'sphinx>=2.1.2;python_version>="3.0"', - 'sphinx==1.8.5;python_version<"3.0"', 'sphinxcontrib-httpdomain>=1.7.0', ] -docker = ['docker~=3.0'] -druid = ['pydruid>=0.4.1'] +docker = [ + 'docker~=3.0', +] +druid = [ + 'pydruid>=0.4.1', +] elasticsearch = [ 'elasticsearch>=5.0.0,<6.0.0', - 'elasticsearch-dsl>=5.0.0,<6.0.0' + 'elasticsearch-dsl>=5.0.0,<6.0.0', ] emr = ['boto3>=1.0.0, <1.8.0'] +flask_oauth = [ + 'Flask-OAuthlib>=0.9.1', + 'oauthlib!=2.0.3,!=2.0.4,!=2.0.5,<3.0.0,>=1.1.2', + 'requests-oauthlib==1.1.0', +] gcp = [ + 'PyOpenSSL', 'google-api-python-client>=1.6.0, <2.0.0dev', - 'google-auth-httplib2>=0.0.1', 'google-auth>=1.0.0, <2.0.0dev', + 'google-auth-httplib2>=0.0.1', 'google-cloud-bigtable==0.33.0', 'google-cloud-container>=0.1.1', 'google-cloud-dlp>=0.11.0', 'google-cloud-language>=1.1.1', 'google-cloud-spanner>=1.7.1, <1.10.0', + 'google-cloud-speech>=0.36.3', 'google-cloud-storage~=1.16', + 'google-cloud-texttospeech>=0.4.0', 'google-cloud-translate>=1.3.3', 'google-cloud-videointelligence>=1.7.0', 'google-cloud-vision>=0.35.2', - 'google-cloud-texttospeech>=0.4.0', - 'google-cloud-speech>=0.36.3', 'grpcio-gcp>=0.2.2', 'httplib2~=0.9', 'pandas-gbq', - 'PyOpenSSL', ] -grpc = ['grpcio>=1.15.0'] -flask_oauth = [ - 'Flask-OAuthlib>=0.9.1', - 'oauthlib!=2.0.3,!=2.0.4,!=2.0.5,<3.0.0,>=1.1.2', - 'requests-oauthlib==1.1.0' +grpc = [ + 'grpcio>=1.15.0', +] +hdfs = [ + 'snakebite>=2.7.8', ] -hdfs = ['snakebite>=2.7.8'] hive = [ 'hmsclient>=0.1.0', 'pyhive>=0.6.0', ] -jdbc = ['jaydebeapi>=1.1.1'] -jenkins = ['python-jenkins>=1.0.0'] -jira = ['JIRA>1.0.7'] -kerberos = ['pykerberos>=1.1.13', - 'requests_kerberos>=0.10.0', - 'thrift_sasl>=0.2.0', - 'snakebite[kerberos]>=2.7.8'] -kubernetes = ['kubernetes>=3.0.0', - 'cryptography>=2.0.0'] -ldap = ['ldap3>=2.5.1'] -mssql = ['pymssql~=2.1.1'] -mysql = ['mysqlclient>=1.3.6,<1.4'] -oracle = ['cx_Oracle>=5.1.2'] -papermill = ['papermill[all]>=1.0.0', - 'nteract-scrapbook[all]>=0.2.1'] +jdbc = [ + 'jaydebeapi>=1.1.1', +] +jenkins = [ + 'python-jenkins>=1.0.0', +] +jira = [ + 'JIRA>1.0.7', +] +kerberos = [ + 'pykerberos>=1.1.13', + 'requests_kerberos>=0.10.0', + 'snakebite[kerberos]>=2.7.8', + 'thrift_sasl>=0.2.0', +] +kubernetes = [ + 'cryptography>=2.0.0', + 'kubernetes>=3.0.0', +] +ldap = [ + 'ldap3>=2.5.1', +] +mongo = [ + 'dnspython>=1.13.0,<2.0.0', + 'pymongo>=3.6.0', +] +mssql = [ + 'pymssql~=2.1.1', +] +mysql = [ + 'mysqlclient>=1.3.6,<1.4', +] +oracle = [ + 'cx_Oracle>=5.1.2', +] +pagerduty = [ + 'pypd>=1.1.0', +] +papermill = [ + 'papermill[all]>=1.0.0', + 'nteract-scrapbook[all]>=0.2.1', +] password = [ 'bcrypt>=2.0.0', 'flask-bcrypt>=0.7.1', ] -pinot = ['pinotdb==0.1.1'] -postgres = ['psycopg2-binary>=2.7.4'] -qds = ['qds-sdk>=1.10.4'] -rabbitmq = ['librabbitmq>=1.6.1'] -redis = ['redis~=3.2'] -s3 = ['boto3>=1.7.0, <1.8.0'] -salesforce = ['simple-salesforce>=0.72'] -samba = ['pysmbclient>=0.1.3'] -segment = ['analytics-python>=1.2.9'] -sendgrid = ['sendgrid>=5.2.0,<6'] -sentry = ['sentry-sdk>=0.8.0', "blinker>=1.1"] -slack = ['slackclient>=1.0.0,<2.0.0'] -mongo = ['pymongo>=3.6.0', 'dnspython>=1.13.0,<2.0.0'] -snowflake = ['snowflake-connector-python>=1.5.2', - 'snowflake-sqlalchemy>=1.1.0'] -ssh = ['paramiko>=2.1.1', 'pysftp>=0.2.9', 'sshtunnel>=0.1.4,<0.2'] -statsd = ['statsd>=3.3.0, <4.0'] -vertica = ['vertica-python>=0.5.1'] -virtualenv = ['virtualenv'] -webhdfs = ['hdfs[dataframe,avro,kerberos]>=2.0.4'] -winrm = ['pywinrm==0.2.2'] -zendesk = ['zdesk'] - -all_dbs = postgres + mysql + hive + mssql + hdfs + vertica + cloudant + druid + pinot \ - + cassandra + mongo +pinot = [ + 'pinotdb==0.1.1', +] +postgres = [ + 'psycopg2-binary>=2.7.4', +] +qds = [ + 'qds-sdk>=1.10.4', +] +rabbitmq = [ + 'librabbitmq>=1.6.1', +] +redis = [ + 'redis~=3.2', +] +s3 = [ + 'boto3>=1.7.0, <1.8.0' +] +salesforce = [ + 'simple-salesforce>=0.72', +] +samba = [ + 'pysmbclient>=0.1.3', +] +segment = [ + 'analytics-python>=1.2.9', +] +sendgrid = [ + 'sendgrid>=5.2.0,<6', +] +sentry = [ + 'blinker>=1.1', + 'sentry-sdk>=0.8.0', +] +slack = [ + 'slackclient>=1.0.0,<2.0.0', +] +snowflake = [ + 'snowflake-connector-python>=1.5.2', + 'snowflake-sqlalchemy>=1.1.0', +] +ssh = [ + 'paramiko>=2.1.1', + 'pysftp>=0.2.9', + 'sshtunnel>=0.1.4,<0.2', +] +statsd = [ + 'statsd>=3.3.0, <4.0', +] +vertica = [ + 'vertica-python>=0.5.1', +] +virtualenv = [ + 'virtualenv', +] +webhdfs = [ + 'hdfs[avro,dataframe,kerberos]>=2.0.4', +] +winrm = [ + 'pywinrm==0.2.2', +] +zendesk = [ + 'zdesk', +] +# End dependencies group + +all_dbs = cassandra + cloudant + druid + hdfs + hive + mongo + mssql + mysql + pinot + postgres + vertica ############################################################################################################ # IMPORTANT NOTE!!!!!!!!!!!!!!! @@ -305,14 +400,16 @@ def write_version(filename=os.path.join(*["airflow", "git_version"])): else: devel += ['unittest2'] -devel_minreq = devel + kubernetes + mysql + doc + password + s3 + cgroups -devel_hadoop = devel_minreq + hive + hdfs + webhdfs + kerberos -devel_azure = devel_minreq + azure_data_lake + azure_cosmos -devel_all = (sendgrid + devel + all_dbs + doc + samba + s3 + slack + crypto + oracle + - docker + ssh + kubernetes + celery + azure_blob_storage + redis + gcp + grpc + - datadog + zendesk + jdbc + ldap + kerberos + password + webhdfs + jenkins + - druid + pinot + segment + snowflake + elasticsearch + sentry + azure_data_lake + azure_cosmos + - atlas + azure_container_instances + cgroups + papermill + virtualenv) +devel_minreq = cgroups + devel + doc + kubernetes + mysql + password + s3 +devel_hadoop = devel_minreq + hdfs + hive + kerberos + webhdfs +devel_azure = azure_cosmos + azure_data_lake + devel_minreq +devel_all = (all_dbs + atlas + + azure_blob_storage + azure_container_instances + azure_cosmos + azure_data_lake + + celery + cgroups + crypto + datadog + devel + doc + docker + druid + + elasticsearch + gcp + grpc + jdbc + jenkins + kerberos + kubernetes + ldap + oracle + + papermill + password + pinot + + redis + s3 + samba + segment + sendgrid + sentry + slack + snowflake + ssh + + virtualenv + webhdfs + zendesk) # Snakebite & Google Cloud Dataflow are not Python 3 compatible :'( if PY3: @@ -357,8 +454,8 @@ def do_setup(): 'dill>=0.2.2, <0.4', 'enum34~=1.1.6;python_version<"3.4"', 'flask>=1.1.0, <2.0', - 'flask-appbuilder>=1.12.5, <2.0.0', 'flask-admin==1.5.3', + 'flask-appbuilder>=1.12.5, <2.0.0', 'flask-caching>=1.3.3, <1.4.0', 'flask-login>=0.3, <0.5', 'flask-swagger==0.2.13', @@ -368,9 +465,9 @@ def do_setup(): 'graphviz>=0.12', 'gunicorn>=19.5.0, <20.0', 'iso8601>=0.1.12', - 'jsonschema~=3.0', - 'json-merge-patch==0.2', 'jinja2>=2.10.1, <2.11.0', + 'json-merge-patch==0.2', + 'jsonschema~=3.0', 'lazy_object_proxy~=1.3', 'markdown>=2.5.2, <3.0', 'marshmallow-sqlalchemy>=0.16.1, <0.19.0', @@ -387,9 +484,9 @@ def do_setup(): 'tenacity==4.12.0', 'termcolor==1.1.0', 'text-unidecode==1.2', + 'thrift>=0.9.2', 'typing;python_version<"3.5"', 'typing-extensions>=3.7.4;python_version<"3.8"', - 'thrift>=0.9.2', 'tzlocal>=1.4,<2.0.0', 'unicodecsv>=0.14.1', 'zope.deprecation>=4.0, <5.0', @@ -405,14 +502,13 @@ def do_setup(): ], extras_require={ 'all': devel_all, - 'devel_ci': devel_ci, 'all_dbs': all_dbs, - 'atlas': atlas, 'async': async_packages, + 'atlas': atlas, 'azure_blob_storage': azure_blob_storage, - 'azure_data_lake': azure_data_lake, - 'azure_cosmos': azure_cosmos, 'azure_container_instances': azure_container_instances, + 'azure_cosmos': azure_cosmos, + 'azure_data_lake': azure_data_lake, 'cassandra': cassandra, 'celery': celery, 'cgroups': cgroups, @@ -422,15 +518,16 @@ def do_setup(): 'databricks': databricks, 'datadog': datadog, 'devel': devel_minreq, - 'devel_hadoop': devel_hadoop, 'devel_azure': devel_azure, + 'devel_ci': devel_ci, + 'devel_hadoop': devel_hadoop, 'doc': doc, 'docker': docker, 'druid': druid, 'elasticsearch': elasticsearch, 'emr': emr, 'gcp': gcp, - 'gcp_api': gcp, # TODO: remove this in Airflow 2.1 + 'gcp_api': gcp, 'github_enterprise': flask_oauth, 'google_auth': flask_oauth, 'grpc': grpc, @@ -455,9 +552,9 @@ def do_setup(): 's3': s3, 'salesforce': salesforce, 'samba': samba, + 'segment': segment, 'sendgrid': sendgrid, 'sentry': sentry, - 'segment': segment, 'slack': slack, 'snowflake': snowflake, 'ssh': ssh, diff --git a/tests/test_order_setup.py b/tests/test_order_setup.py new file mode 100755 index 00000000000000..dd2413da8c54dc --- /dev/null +++ b/tests/test_order_setup.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Test for an order of dependencies in setup.py +""" + +import os +import re +import unittest + + +class TestOrderSetup(unittest.TestCase): + + def setUp(self): + current_dir = os.path.dirname(os.path.abspath(__file__)) + parent_dir = os.path.dirname(current_dir) + self.setup_file = open('{parent_dir}/setup.py'.format(parent_dir=parent_dir)) + self.setup_context = self.setup_file.read() + + def tearDown(self): + self.setup_file.close() + + def test_main_dependent_group(self): + """ + Test for an order of dependencies groups between mark + '# Start dependencies group' and '# End dependencies group' in setup.py + """ + pattern_main_dependent_group = re.compile( + '# Start dependencies group\n(.*)# End dependencies group', re.DOTALL) + main_dependent_group = pattern_main_dependent_group.findall(self.setup_context)[0] + + pattern_sub_dependent = re.compile(' = \\[.*?\\]\n', re.DOTALL) + main_dependent = pattern_sub_dependent.sub(',', main_dependent_group) + + src = main_dependent.strip(',').split(',') + alphabetical = sorted(src) + self.assertListEqual(alphabetical, src) + + def test_sub_dependent_group(self): + """ + Test for an order of each dependencies groups declare like + `^dependent_group_name = [.*?]\n` in setup.py + """ + pattern_dependent_group_name = re.compile('^(\\w+) = \\[', re.MULTILINE) + dependent_group_names = pattern_dependent_group_name.findall(self.setup_context) + + pattern_dependent_version = re.compile('[~|>|<|=|;].*') + for group_name in dependent_group_names: + pattern_sub_dependent = re.compile( + '{group_name} = \\[(.*?)\\]'.format(group_name=group_name), re.DOTALL) + sub_dependent = pattern_sub_dependent.findall(self.setup_context)[0] + pattern_dependent = re.compile('\'(.*?)\'') + dependent = pattern_dependent.findall(sub_dependent) + + src = [pattern_dependent_version.sub('', p) for p in dependent] + alphabetical = sorted(src) + self.assertListEqual(alphabetical, src) + + def test_alias_dependent_group(self): + """ + Test for an order of each dependencies groups declare like + `alias_dependent_group = dependent_group_1 + ... + dependent_group_n` in setup.py + """ + pattern = re.compile('^\\w+ = (\\w+ \\+.*)', re.MULTILINE) + dependents = pattern.findall(self.setup_context) + for dependent in dependents: + src = dependent.split(' + ') + alphabetical = sorted(src) + self.assertListEqual(alphabetical, src) + + def test_devel_all(self): + """ + Test for an order of dependencies groups + devel_all = (dependent_group_1 + ... + dependent_group_n) in setup.py + """ + pattern = re.compile('devel_all = \\((.*?)\\)', re.DOTALL) + dependent = pattern.findall(self.setup_context)[0] + pattern_new_line = re.compile('\\n *') + + src = pattern_new_line.sub(' ', dependent).split(' + ') + alphabetical = sorted(src) + self.assertListEqual(alphabetical, src) + + def test_install_and_setup_requires(self): + """ + Test for an order of dependencies in function do_setup section + install_requires and setup_requires in setup.py + """ + pattern_install_and_setup_requires = re.compile( + '(install_requires|setup_requires)=\\[(.*?)\\]', re.DOTALL) + install_and_setup_requires = pattern_install_and_setup_requires.findall(self.setup_context) + + for dependent_requires in install_and_setup_requires: + pattern_dependent = re.compile('\'(.*?)\'') + dependent = pattern_dependent.findall(dependent_requires[1]) + pattern_dependent_version = re.compile('[~|>|<|=|;].*') + + src = [pattern_dependent_version.sub('', p) for p in dependent] + alphabetical = sorted(src) + self.assertListEqual(alphabetical, src) + + def test_extras_require(self): + """ + Test for an order of dependencies in function do_setup section + extras_require in setup.py + """ + pattern_extras_requires = re.compile('extras_require=\\{(.*?)\\}', re.DOTALL) + extras_requires = pattern_extras_requires.findall(self.setup_context)[0] + + pattern_dependent = re.compile('\'(.*?)\'') + src = pattern_dependent.findall(extras_requires) + alphabetical = sorted(src) + self.assertListEqual(alphabetical, src) + + +if __name__ == '__main__': + unittest.main(verbosity=2)