Skip to content

Commit

Permalink
Bump version: 0.5.3 → 0.5.4 (#214)
Browse files Browse the repository at this point in the history
dbt 0.5.4 release
  • Loading branch information
drewbanin authored Nov 29, 2016
1 parent 40948fb commit c60d6e5
Show file tree
Hide file tree
Showing 89 changed files with 4,285 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.5.3
current_version = 0.5.4
commit = True
tag = True

Expand Down
3 changes: 3 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[report]
include =
dbt/*
18 changes: 18 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM python

RUN apt-get update

RUN apt-get install -y python-pip netcat
RUN apt-get install -y python-dev python3-dev

RUN pip install pip --upgrade
RUN pip install virtualenv
RUN pip install virtualenvwrapper

COPY . /usr/src/app

WORKDIR /usr/src/app
RUN cd /usr/src/app
RUN ./test/setup.sh


13 changes: 13 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
.PHONY: test

changed_tests := `git status --porcelain | grep '^\( M\|A\)' | awk '{ print $$2 }' | grep '\/test_[a-zA-Z_\-\.]\+.py'`

test:
@echo "Test run starting..."
@docker-compose run test /usr/src/app/test/runner.sh

test-new:
@echo "Test run starting..."
@echo "Changed test files:"
@echo "${changed_tests}"
@docker-compose run test /usr/src/app/test/runner.sh ${changed_tests}
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# dbt
Tests: [![CircleCI](https://circleci.com/gh/analyst-collective/dbt/tree/master.svg?style=svg)](https://circleci.com/gh/analyst-collective/dbt/tree/master)
[![AppVeyor](https://ci.appveyor.com/api/projects/status/v01rwd3q91jnwp9m/branch/development?svg=true)](https://ci.appveyor.com/project/DrewBanin/dbt/branch/development)
[Coverage](https://circleci.com/api/v1/project/analyst-collective/dbt/latest/artifacts/0/$CIRCLE_ARTIFACTS/htmlcov/index.html?branch=development)
[Docs](http://dbt.readthedocs.io/en/master/about/overview/)

dbt (data build tool) helps analysts write reliable, modular code using a workflow that closely mirrors software development.

Expand Down
66 changes: 66 additions & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
version: 1.0.{build}-{branch}

environment:
# SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
# /E:ON and /V:ON options are not enabled in the batch script intepreter
# See: http://stackoverflow.com/a/13751649/163740
CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd"
TOX_ENV: "pywin"

matrix:
- PYTHON: "C:\\Python27"
PYTHON_VERSION: "2.7.8"
PYTHON_ARCH: "32"

#- PYTHON: "C:\\Python35"
# PYTHON_VERSION: "3.5.2"
# PYTHON_ARCH: "32"

PGUSER: postgres
PGPASSWORD: Password12!

services:
- postgresql94

hosts:
database: 127.0.0.1

init:
- PATH=C:\Program Files\PostgreSQL\9.4\bin\;%PATH%
- ps: Set-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host all all ::1/128 trust"
- ps: Add-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host all all 127.0.0.1/32 trust"

install:
# Download setup scripts and unzip
- ps: "wget https://github.com/cloudify-cosmo/appveyor-utils/archive/master.zip -OutFile ./master.zip"
- "7z e master.zip */appveyor/* -oappveyor"

# Install Python (from the official .msi of http://python.org) and pip when
# not already installed.
- "powershell ./appveyor/install.ps1"

# Prepend newly installed Python to the PATH of this build (this cannot be
# done from inside the powershell script as it would require to restart
# the parent CMD process).
- "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"

# Check that we have the expected version and architecture for Python
- "python --version"
- "python -c \"import struct; print(struct.calcsize('P') * 8)\""

build: false # Not a C# project, build stuff at the test step instead.

before_test:
- "%CMD_IN_ENV% pip install tox"

test_script:
# set up psql db
- createdb dbt
- psql -c "CREATE ROLE root WITH UNENCRYPTED PASSWORD 'password';" -U postgres
- psql -c "ALTER ROLE root WITH LOGIN;" -U postgres
- psql -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root;" -U postgres

# this is generally a bad idea TODO
- git config --system http.sslverify false

- "%CMD_IN_ENV% tox -e %TOX_ENV%"
29 changes: 26 additions & 3 deletions circle.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
machine:
post:
- pyenv global 2.7.9 3.5.0
hosts:
database: 127.0.0.1

database:
override:
- createdb dbt
- echo "CREATE ROLE root WITH UNENCRYPTED PASSWORD 'password';" | psql -U postgres
- echo "ALTER ROLE root WITH LOGIN;" | psql -U postgres
- echo "GRANT SELECT, UPDATE, INSERT ON ALL TABLES IN SCHEMA dbt.* TO root;" | psql -U postgres
- echo "GRANT CREATE, CONNECT ON DATABASE dbt TO root;" | psql -U postgres


dependencies:
pre:
- sudo add-apt-repository -y ppa:fkrull/deadsnakes
- sudo apt-get update
- sudo apt-get install python3.5 python3.5-dev
- pip install --upgrade pip setuptools || true
- pip install --upgrade tox tox-pyenv
override:
- pyenv local 2.7.9 3.5.0

test:
override:
- sudo chown -R ubuntu:ubuntu /root/
- /bin/bash -c 'cd /home/ubuntu/dbt && tox'
post:
- mv htmlcov $CIRCLE_ARTIFACTS/
50 changes: 40 additions & 10 deletions dbt/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import time
import sqlparse

CompilableEntities = ["models", "tests", "archives", "analyses"]
CompilableEntities = ["models", "data tests", "schema tests", "archives", "analyses"]

class Compiler(object):
def __init__(self, project, create_template_class):
Expand Down Expand Up @@ -60,6 +60,10 @@ def project_schemas(self):
source_paths = self.project.get('source-paths', [])
return Source(self.project).get_schemas(source_paths)

def project_tests(self):
source_paths = self.project.get('test-paths', [])
return Source(self.project).get_tests(source_paths)

def analysis_sources(self, project):
paths = project.get('analysis-paths', [])
return Source(project).get_analyses(paths)
Expand Down Expand Up @@ -109,7 +113,7 @@ def model_can_reference(self, src_model, other_model):
return other_model.own_project['name'] == src_model.own_project['name'] \
or src_model.own_project['name'] == src_model.project['name']

def __ref(self, linker, ctx, model, all_models):
def __ref(self, linker, ctx, model, all_models, add_dependency=True):
schema = ctx['env']['schema']

source_model = tuple(model.fqn)
Expand Down Expand Up @@ -138,7 +142,7 @@ def do_ref(*args):

# this creates a trivial cycle -- should this be a compiler error?
# we can still interpolate the name w/o making a self-cycle
if source_model == other_model_fqn:
if source_model == other_model_fqn or not add_dependency:
pass
else:
linker.dependency(source_model, other_model_fqn)
Expand All @@ -163,14 +167,15 @@ def wrapped_do_ref(*args):

return wrapped_do_ref

def get_context(self, linker, model, models):
def get_context(self, linker, model, models, add_dependency=False):
context = self.project.context()

# built-ins
context['ref'] = self.__ref(linker, context, model, models)
context['ref'] = self.__ref(linker, context, model, models, add_dependency)
context['config'] = self.__model_config(model, linker)
context['this'] = This(context['env']['schema'], model.immediate_name, model.name)
context['var'] = Var(model, context=context)
context['target'] = self.project.get('run-target')

# these get re-interpolated at runtime!
context['run_started_at'] = '{{ run_started_at }}'
Expand All @@ -185,15 +190,15 @@ def get_context(self, linker, model, models):

return context

def compile_model(self, linker, model, models):
def compile_model(self, linker, model, models, add_dependency=True):
try:
fs_loader = jinja2.FileSystemLoader(searchpath=model.root_dir)
jinja = jinja2.Environment(loader=fs_loader)

# this is a dumb jinja2 bug -- on windows, forward slashes are EXPECTED
posix_filepath = '/'.join(split_path(model.rel_filepath))
template = jinja.get_template(posix_filepath)
context = self.get_context(linker, model, models)
context = self.get_context(linker, model, models, add_dependency=add_dependency)

rendered = template.render(context)
except jinja2.exceptions.TemplateSyntaxError as e:
Expand Down Expand Up @@ -329,6 +334,23 @@ def compile_schema_tests(self, linker):

return written_tests

def compile_data_tests(self, linker):
tests = self.project_tests()

all_models = self.get_models()
enabled_models = [model for model in all_models if model.is_enabled]

written_tests = []
for data_test in tests:
serialized = data_test.serialize()
linker.update_node_data(tuple(data_test.fqn), serialized)
query = self.compile_model(linker, data_test, enabled_models, add_dependency=False)
wrapped = data_test.render(query)
self.__write(data_test.build_path(), wrapped)
written_tests.append(data_test)

return written_tests

def generate_macros(self, all_macros):
def do_gen(ctx):
macros = []
Expand All @@ -351,14 +373,20 @@ def compile_archives(self):
self.write_graph_file(linker, 'archive')
return all_archives

def get_models(self):
all_models = self.model_sources(this_project=self.project)
for project in dependency_projects(self.project):
all_models.extend(self.model_sources(this_project=self.project, own_project=project))

return all_models

def compile(self, dry=False):
linker = Linker()

all_models = self.model_sources(this_project=self.project)
all_models = self.get_models()
all_macros = self.get_macros(this_project=self.project)

for project in dependency_projects(self.project):
all_models.extend(self.model_sources(this_project=self.project, own_project=project))
all_macros.extend(self.get_macros(this_project=self.project, own_project=project))

self.macro_generator = self.generate_macros(all_macros)
Expand All @@ -369,6 +397,7 @@ def compile(self, dry=False):

# TODO : only compile schema tests for enabled models
written_schema_tests = self.compile_schema_tests(linker)
written_data_tests = self.compile_data_tests(linker)

self.validate_models_unique(compiled_models)
self.validate_models_unique(written_schema_tests)
Expand All @@ -384,7 +413,8 @@ def compile(self, dry=False):

return {
"models": len(written_models),
"tests" : len(written_schema_tests),
"schema tests" : len(written_schema_tests),
"data tests" : len(written_data_tests),
"archives": len(compiled_archives),
"analyses" : len(written_analyses)
}
3 changes: 3 additions & 0 deletions dbt/compiled_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def should_skip(self):
def is_type(self, run_type):
return self.data['dbt_run_type'] == run_type

def is_test_type(self, test_type):
return self.data.get('dbt_test_type') == test_type

@property
def contents(self):
if self._contents is None:
Expand Down
4 changes: 2 additions & 2 deletions dbt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ def handle(args):
sub.set_defaults(cls=seed_task.SeedTask, which='seed')

sub = subs.add_parser('test', parents=[base_subparser])
sub.add_argument('--skip-test-creates', action='store_true', help="Don't create temporary views to validate model SQL")
sub.add_argument('--validate', action='store_true', help='Run constraint validations from schema.yml files')
sub.add_argument('--data', action='store_true', help='Run data tests defined in "tests" directory')
sub.add_argument('--schema', action='store_true', help='Run constraint validations from schema.yml files')
sub.add_argument('--threads', type=int, required=False, help="Specify number of threads to use while executing tests. Overrides settings in profiles.yml")
sub.set_defaults(cls=test_task.TestTask, which='test')

Expand Down
37 changes: 37 additions & 0 deletions dbt/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ def __repr__(self):
class SchemaTest(DBTSource):
test_type = "base"
dbt_run_type = 'test'
dbt_test_type = 'schema'

def __init__(self, project, target_dir, rel_filepath, model_name, options):
self.schema = project.context()['env']['schema']
Expand All @@ -430,6 +431,12 @@ def fqn(self):
name, _ = os.path.splitext(parts[-1])
return [self.project['name']] + parts[1:-1] + ['schema', self.get_filename()]

def serialize(self):
serialized = DBTSource.serialize(self).copy()
serialized['dbt_test_type'] = self.dbt_test_type

return serialized

def get_params(self, options):
return {
"schema": self.schema,
Expand Down Expand Up @@ -644,3 +651,33 @@ def build_path(self):

def __repr__(self):
return "<ArchiveModel {} --> {} unique:{} updated_at:{}>".format(self.source_table, self.target_table, self.unique_key, self.updated_at)

class DataTest(DBTSource):
dbt_run_type = 'test'
dbt_test_type = 'data'

def __init__(self, project, target_dir, rel_filepath, own_project):
super(DataTest, self).__init__(project, target_dir, rel_filepath, own_project)

def build_path(self):
build_dir = "test"
filename = "{}.sql".format(self.name)
fqn_parts = self.fqn[0:1] + ['data'] + self.fqn[1:-1]
path_parts = [build_dir] + fqn_parts + [filename]
return os.path.join(*path_parts)

def serialize(self):
serialized = DBTSource.serialize(self).copy()
serialized['dbt_test_type'] = self.dbt_test_type

return serialized

def render(self, query):
return "select count(*) from (\n{}\n) sbq".format(query)

@property
def immediate_name(self):
return self.name

def __repr__(self):
return "<DataTest {}.{}: {}>".format(self.project['name'], self.name, self.filepath)
Loading

0 comments on commit c60d6e5

Please sign in to comment.