Bump version: 0.5.3 → 0.5.4 (#214)

dbt 0.5.4 release
dbt-labs · Nov 29, 2016 · c60d6e5 · c60d6e5
1 parent 40948fb
commit c60d6e5
Show file tree

Hide file tree

Showing 89 changed files with 4,285 additions and 37 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.5.3
+current_version = 0.5.4
 commit = True
 tag = True
 

diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,3 @@
+[report]
+include =
+  dbt/*
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,18 @@
+FROM python
+
+RUN apt-get update
+
+RUN apt-get install -y python-pip netcat
+RUN apt-get install -y python-dev python3-dev
+
+RUN pip install pip --upgrade
+RUN pip install virtualenv
+RUN pip install virtualenvwrapper
+
+COPY . /usr/src/app
+
+WORKDIR /usr/src/app
+RUN cd /usr/src/app
+RUN ./test/setup.sh
+
+
diff --git a/Makefile b/Makefile
@@ -0,0 +1,13 @@
+.PHONY: test
+
+changed_tests := `git status --porcelain | grep '^\( M\|A\)' | awk '{ print $$2 }' | grep '\/test_[a-zA-Z_\-\.]\+.py'`
+
+test:
+	@echo "Test run starting..."
+	@docker-compose run test /usr/src/app/test/runner.sh
+
+test-new:
+	@echo "Test run starting..."
+	@echo "Changed test files:"
+	@echo "${changed_tests}"
+	@docker-compose run test /usr/src/app/test/runner.sh ${changed_tests}
diff --git a/README.md b/README.md
@@ -1,5 +1,11 @@
 # dbt
 Tests: [![CircleCI](https://circleci.com/gh/analyst-collective/dbt/tree/master.svg?style=svg)](https://circleci.com/gh/analyst-collective/dbt/tree/master)
+•
+[![AppVeyor](https://ci.appveyor.com/api/projects/status/v01rwd3q91jnwp9m/branch/development?svg=true)](https://ci.appveyor.com/project/DrewBanin/dbt/branch/development)
+•
+[Coverage](https://circleci.com/api/v1/project/analyst-collective/dbt/latest/artifacts/0/$CIRCLE_ARTIFACTS/htmlcov/index.html?branch=development)
+•
+[Docs](http://dbt.readthedocs.io/en/master/about/overview/)
 
 dbt (data build tool) helps analysts write reliable, modular code using a workflow that closely mirrors software development.
 

diff --git a/appveyor.yml b/appveyor.yml
@@ -0,0 +1,66 @@
+version: 1.0.{build}-{branch}
+
+environment:
+  # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
+  # /E:ON and /V:ON options are not enabled in the batch script intepreter
+  # See: http://stackoverflow.com/a/13751649/163740
+  CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd"
+  TOX_ENV: "pywin"
+
+  matrix:
+    - PYTHON: "C:\\Python27"
+      PYTHON_VERSION: "2.7.8"
+      PYTHON_ARCH: "32"
+
+    #- PYTHON: "C:\\Python35"
+    #  PYTHON_VERSION: "3.5.2"
+    #  PYTHON_ARCH: "32"
+
+  PGUSER: postgres
+  PGPASSWORD: Password12!
+
+services:
+  - postgresql94
+
+hosts:
+  database: 127.0.0.1
+
+init:
+  - PATH=C:\Program Files\PostgreSQL\9.4\bin\;%PATH%
+  - ps: Set-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host   all   all   ::1/128        trust"
+  - ps: Add-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host   all   all   127.0.0.1/32   trust"
+
+install:
+  # Download setup scripts and unzip
+  - ps: "wget https://github.com/cloudify-cosmo/appveyor-utils/archive/master.zip -OutFile ./master.zip"
+  - "7z e master.zip */appveyor/* -oappveyor"
+
+  # Install Python (from the official .msi of http://python.org) and pip when
+  # not already installed.
+  - "powershell ./appveyor/install.ps1"
+
+  # Prepend newly installed Python to the PATH of this build (this cannot be
+  # done from inside the powershell script as it would require to restart
+  # the parent CMD process).
+  - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
+
+  # Check that we have the expected version and architecture for Python
+  - "python --version"
+  - "python -c \"import struct; print(struct.calcsize('P') * 8)\""
+
+build: false  # Not a C# project, build stuff at the test step instead.
+
+before_test:
+  - "%CMD_IN_ENV% pip install tox"
+
+test_script:
+  # set up psql db
+  - createdb dbt
+  - psql -c "CREATE ROLE root WITH UNENCRYPTED PASSWORD 'password';" -U postgres
+  - psql -c "ALTER ROLE root WITH LOGIN;" -U postgres
+  - psql -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root;" -U postgres
+
+  # this is generally a bad idea TODO
+  - git config --system http.sslverify false
+
+  - "%CMD_IN_ENV% tox -e %TOX_ENV%"
diff --git a/circle.yml b/circle.yml
@@ -1,5 +1,28 @@
+machine:
+  post:
+    - pyenv global 2.7.9 3.5.0
+  hosts:
+    database: 127.0.0.1
+
+database:
+  override:
+    - createdb dbt
+    - echo "CREATE ROLE root WITH UNENCRYPTED PASSWORD 'password';" | psql -U postgres
+    - echo "ALTER ROLE root WITH LOGIN;" | psql -U postgres
+    - echo "GRANT SELECT, UPDATE, INSERT ON ALL TABLES IN SCHEMA dbt.* TO root;" | psql -U postgres
+    - echo "GRANT CREATE, CONNECT ON DATABASE dbt TO root;" | psql -U postgres
+
+
 dependencies:
   pre:
-    - sudo add-apt-repository -y ppa:fkrull/deadsnakes
-    - sudo apt-get update
-    - sudo apt-get install python3.5 python3.5-dev
+    - pip install --upgrade pip setuptools || true
+    - pip install --upgrade tox tox-pyenv
+  override:
+    - pyenv local 2.7.9 3.5.0
+
+test:
+  override:
+    - sudo chown -R ubuntu:ubuntu /root/
+    - /bin/bash -c 'cd /home/ubuntu/dbt && tox'
+  post:
+    - mv htmlcov $CIRCLE_ARTIFACTS/
diff --git a/dbt/compilation.py b/dbt/compilation.py
@@ -12,7 +12,7 @@
 import time
 import sqlparse
 
-CompilableEntities = ["models", "tests", "archives", "analyses"]
+CompilableEntities = ["models", "data tests", "schema tests", "archives", "analyses"]
 
 class Compiler(object):
     def __init__(self, project, create_template_class):
@@ -60,6 +60,10 @@ def project_schemas(self):
         source_paths = self.project.get('source-paths', [])
         return Source(self.project).get_schemas(source_paths)
 
+    def project_tests(self):
+        source_paths = self.project.get('test-paths', [])
+        return Source(self.project).get_tests(source_paths)
+
     def analysis_sources(self, project):
         paths = project.get('analysis-paths', [])
         return Source(project).get_analyses(paths)
@@ -109,7 +113,7 @@ def model_can_reference(self, src_model, other_model):
         return other_model.own_project['name'] == src_model.own_project['name'] \
                 or src_model.own_project['name'] == src_model.project['name']
 
-    def __ref(self, linker, ctx, model, all_models):
+    def __ref(self, linker, ctx, model, all_models, add_dependency=True):
         schema = ctx['env']['schema']
 
         source_model = tuple(model.fqn)
@@ -138,7 +142,7 @@ def do_ref(*args):
 
             # this creates a trivial cycle -- should this be a compiler error?
             # we can still interpolate the name w/o making a self-cycle
-            if source_model == other_model_fqn:
+            if source_model == other_model_fqn or not add_dependency:
                 pass
             else:
                 linker.dependency(source_model, other_model_fqn)
@@ -163,14 +167,15 @@ def wrapped_do_ref(*args):
 
         return wrapped_do_ref
 
-    def get_context(self, linker, model,  models):
+    def get_context(self, linker, model,  models, add_dependency=False):
         context = self.project.context()
 
         # built-ins
-        context['ref'] = self.__ref(linker, context, model, models)
+        context['ref'] = self.__ref(linker, context, model, models, add_dependency)
         context['config'] = self.__model_config(model, linker)
         context['this'] = This(context['env']['schema'], model.immediate_name, model.name)
         context['var'] = Var(model, context=context)
+        context['target'] = self.project.get('run-target')
 
         # these get re-interpolated at runtime!
         context['run_started_at'] = '{{ run_started_at }}'
@@ -185,15 +190,15 @@ def get_context(self, linker, model,  models):
 
         return context
 
-    def compile_model(self, linker, model, models):
+    def compile_model(self, linker, model, models, add_dependency=True):
         try:
             fs_loader = jinja2.FileSystemLoader(searchpath=model.root_dir)
             jinja = jinja2.Environment(loader=fs_loader)
 
             # this is a dumb jinja2 bug -- on windows, forward slashes are EXPECTED
             posix_filepath = '/'.join(split_path(model.rel_filepath))
             template = jinja.get_template(posix_filepath)
-            context = self.get_context(linker, model, models)
+            context = self.get_context(linker, model, models, add_dependency=add_dependency)
 
             rendered = template.render(context)
         except jinja2.exceptions.TemplateSyntaxError as e:
@@ -329,6 +334,23 @@ def compile_schema_tests(self, linker):
 
         return written_tests
 
+    def compile_data_tests(self, linker):
+        tests = self.project_tests()
+
+        all_models = self.get_models()
+        enabled_models = [model for model in all_models if model.is_enabled]
+
+        written_tests = []
+        for data_test in tests:
+            serialized = data_test.serialize()
+            linker.update_node_data(tuple(data_test.fqn), serialized)
+            query = self.compile_model(linker, data_test, enabled_models, add_dependency=False)
+            wrapped = data_test.render(query)
+            self.__write(data_test.build_path(), wrapped)
+            written_tests.append(data_test)
+
+        return written_tests
+
     def generate_macros(self, all_macros):
         def do_gen(ctx):
             macros = []
@@ -351,14 +373,20 @@ def compile_archives(self):
         self.write_graph_file(linker, 'archive')
         return all_archives
 
+    def get_models(self):
+        all_models = self.model_sources(this_project=self.project)
+        for project in dependency_projects(self.project):
+            all_models.extend(self.model_sources(this_project=self.project, own_project=project))
+
+        return all_models
+
     def compile(self, dry=False):
         linker = Linker()
 
-        all_models = self.model_sources(this_project=self.project)
+        all_models = self.get_models()
         all_macros = self.get_macros(this_project=self.project)
 
         for project in dependency_projects(self.project):
-            all_models.extend(self.model_sources(this_project=self.project, own_project=project))
             all_macros.extend(self.get_macros(this_project=self.project, own_project=project))
 
         self.macro_generator = self.generate_macros(all_macros)
@@ -369,6 +397,7 @@ def compile(self, dry=False):
 
         # TODO : only compile schema tests for enabled models
         written_schema_tests = self.compile_schema_tests(linker)
+        written_data_tests = self.compile_data_tests(linker)
 
         self.validate_models_unique(compiled_models)
         self.validate_models_unique(written_schema_tests)
@@ -384,7 +413,8 @@ def compile(self, dry=False):
 
         return {
             "models": len(written_models),
-            "tests" : len(written_schema_tests),
+            "schema tests" : len(written_schema_tests),
+            "data tests" : len(written_data_tests),
             "archives": len(compiled_archives),
             "analyses" : len(written_analyses)
         }
diff --git a/dbt/compiled_model.py b/dbt/compiled_model.py
@@ -39,6 +39,9 @@ def should_skip(self):
     def is_type(self, run_type):
         return self.data['dbt_run_type'] == run_type
 
+    def is_test_type(self, test_type):
+        return self.data.get('dbt_test_type') == test_type
+
     @property
     def contents(self):
         if self._contents is None:

diff --git a/dbt/main.py b/dbt/main.py
@@ -87,8 +87,8 @@ def handle(args):
     sub.set_defaults(cls=seed_task.SeedTask, which='seed')
 
     sub = subs.add_parser('test', parents=[base_subparser])
-    sub.add_argument('--skip-test-creates', action='store_true', help="Don't create temporary views to validate model SQL")
-    sub.add_argument('--validate', action='store_true', help='Run constraint validations from schema.yml files')
+    sub.add_argument('--data', action='store_true', help='Run data tests defined in "tests" directory')
+    sub.add_argument('--schema', action='store_true', help='Run constraint validations from schema.yml files')
     sub.add_argument('--threads', type=int, required=False, help="Specify number of threads to use while executing tests. Overrides settings in profiles.yml")
     sub.set_defaults(cls=test_task.TestTask, which='test')
 

diff --git a/dbt/model.py b/dbt/model.py
@@ -415,6 +415,7 @@ def __repr__(self):
 class SchemaTest(DBTSource):
     test_type = "base"
     dbt_run_type = 'test'
+    dbt_test_type = 'schema'
 
     def __init__(self, project, target_dir, rel_filepath, model_name, options):
         self.schema = project.context()['env']['schema']
@@ -430,6 +431,12 @@ def fqn(self):
         name, _ = os.path.splitext(parts[-1])
         return [self.project['name']] + parts[1:-1] + ['schema',  self.get_filename()]
 
+    def serialize(self):
+        serialized = DBTSource.serialize(self).copy()
+        serialized['dbt_test_type'] = self.dbt_test_type
+
+        return serialized
+
     def get_params(self, options):
         return {
             "schema": self.schema,
@@ -644,3 +651,33 @@ def build_path(self):
 
     def __repr__(self):
         return "<ArchiveModel {} --> {} unique:{} updated_at:{}>".format(self.source_table, self.target_table, self.unique_key, self.updated_at)
+
+class DataTest(DBTSource):
+    dbt_run_type = 'test'
+    dbt_test_type = 'data'
+
+    def __init__(self, project, target_dir, rel_filepath, own_project):
+        super(DataTest, self).__init__(project, target_dir, rel_filepath, own_project)
+
+    def build_path(self):
+        build_dir = "test"
+        filename = "{}.sql".format(self.name)
+        fqn_parts = self.fqn[0:1] + ['data'] + self.fqn[1:-1]
+        path_parts = [build_dir] + fqn_parts + [filename]
+        return os.path.join(*path_parts)
+
+    def serialize(self):
+        serialized = DBTSource.serialize(self).copy()
+        serialized['dbt_test_type'] = self.dbt_test_type
+
+        return serialized
+
+    def render(self, query):
+        return "select count(*) from (\n{}\n) sbq".format(query)
+
+    @property
+    def immediate_name(self):
+        return self.name
+
+    def __repr__(self):
+        return "<DataTest {}.{}: {}>".format(self.project['name'], self.name, self.filepath)