[WIP] Assemble the apps using Docker Compose (closes #23)

snowplow · Jun 26, 2018 · c83e6c2 · c83e6c2
1 parent 80b1390
commit c83e6c2
Show file tree

Hide file tree

Showing 34 changed files with 525 additions and 705 deletions.
diff --git a/Vagrantfile b/Vagrantfile
@@ -20,8 +20,8 @@ Vagrant.configure("2") do |config|
     vb.name = Dir.pwd().split("/")[-1] + "-" + Time.now.to_f.to_i.to_s
     vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
     vb.customize [ "guestproperty", "set", :id, "--timesync-threshold", 10000 ]
-    vb.memory = 4096
-    vb.cpus = 1
+    vb.memory = 8192
+    vb.cpus = 2
   end
 
   config.vm.provision :shell do |sh|

diff --git a/integration/integration_test.sh b/integration/integration_test.sh
@@ -1,14 +1,5 @@
 #!/bin/bash
 
-sudo service elasticsearch start
-sudo service iglu_server_0.3.0 start
-sudo service snowplow_stream_collector start
-sudo service snowplow_stream_enrich start
-sudo service snowplow_elasticsearch_loader_good start
-sudo service snowplow_elasticsearch_loader_bad start
-sudo service kibana4_init start
-sleep 15
-
 # Send good and bad events
 COUNTER=0
 while [  $COUNTER -lt 10 ]; do

diff --git a/provisioning/resources/configs/Caddyfile b/provisioning/resources/configs/Caddyfile
@@ -5,7 +5,6 @@
     /kibana
     /elasticsearch
     /control-plane
-    /_plugin
   }
   redir /home /home/
   redir /kibana /kibana/
@@ -29,7 +28,6 @@
   proxy /elasticsearch localhost:9200 {
     without /elasticsearch
   }
-  proxy /_plugin localhost:9200
 
   proxy /control-plane localhost:10000 {
     without /control-plane

diff --git a/provisioning/resources/configs/control-plane-api.toml b/provisioning/resources/configs/control-plane-api.toml
@@ -14,15 +14,15 @@ caddy = "Caddyfile"
 iglu_resolver = "iglu-resolver.json"
 
 [init_scripts]
-stream_collector = "snowplow_stream_collector"
-stream_enrich = "snowplow_stream_enrich"
-es_loader_good = "snowplow_elasticsearch_loader_good"
-es_loader_bad = "snowplow_elasticsearch_loader_bad"
-iglu = "iglu_server_0.3.0"
+stream_collector = "scala-stream-collector"
+stream_enrich = "stream-enrich"
+es_loader_good = "elasticsearch-loader-good"
+es_loader_bad = "elasticsearch-loader-bad"
+iglu = "iglu-server"
 caddy = "caddy_init"
 
 [PSQL]
 user = "snowplow"
 password = "snowplow"
 database = "iglu"
-adddress = "127.0.0.1:5432"
+address = "127.0.0.1:5433"
diff --git a/provisioning/resources/configs/iglu-server.conf b/provisioning/resources/configs/iglu-server.conf
@@ -27,7 +27,7 @@ repo-server {
 # 'postgres' contains configuration options for the postgre instance the server
 # is using
 postgres {
-  host = "localhost"
+  host = "postgres"
   port = 5432
   dbname = "iglu"
   username = "snowplow"

diff --git a/provisioning/resources/configs/snowplow-es-loader-bad.hocon b/provisioning/resources/configs/snowplow-es-loader-bad.hocon
@@ -14,93 +14,37 @@
 # This file (config.hocon.sample) contains a template with
 # configuration options for the Elasticsearch Loader.
 
-# Sources currently supported are:
-# "kinesis" for reading records from a Kinesis stream
-# "stdin" for reading unencoded tab-separated events from stdin
-# If set to "stdin", JSON documents will not be sent to Elasticsearch
-# but will be written to stdout.
-# "nsq" for reading unencoded tab-separated events from NSQ
 source = nsq
 
-# Where to write good and bad records
 sink {
-  # Sinks currently supported are:
-  # "elasticsearch" for writing good records to Elasticsearch
-  # "stdout" for writing good records to stdout
   good = elasticsearch
-
-  # Sinks currently supported are:
-  # "kinesis" for writing bad records to Kinesis
-  # "stderr" for writing bad records to stderr
-  # "nsq" for writing bad records to NSQ
-  # "none" for ignoring bad records
   bad = none
 }
 
-# "good" for a stream of successfully enriched events
-# "bad" for a stream of bad events
-# "plain-json" for writing plain json
 enabled = bad
 
-# The following are used to authenticate for the Amazon Kinesis sink.
-#
-# If both are set to "default", the default provider chain is used
-# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html)
-#
-# If both are set to "iam", use AWS IAM Roles to provision credentials.
-#
-# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
 aws {
   accessKey: ""
   secretKey: ""
 }
 
-# config for NSQ
 nsq {
-  # Channel name for NSQ source
   channelName = ESLoaderChannelBad
-
-  # Host name for NSQ tools
-  host = "127.0.0.1"
-
-  # TCP port for nsqd
+  host = nsqlookupd
   port = 4150
-
-  # HTTP port for nsqlookupd
   lookupPort = 4161
 }
 
 kinesis {
-  # "LATEST": most recent data.
-  # "TRIM_HORIZON": oldest available data.
-  # "AT_TIMESTAMP": Start from the record at or after the specified timestamp
-  # Note: This only affects the first run of this application on a stream.
   initialPosition= TRIM_HORIZON
-
-  # Maximum number of records to get from Kinesis per call to GetRecords
   maxRecords = 1000
-
-  # Region where the Kinesis stream is located
   region = ""
-
-  # "appName" is used for a DynamoDB table to maintain stream state.
-  # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}"
   appName = ""
 }
 
-# Common configuration section for all stream sources
 streams {
   inStreamName = BadEnrichedEvents
-
-  # Stream for enriched events which are rejected by Elasticsearch
   outStreamName = BadElasticsearchEvents
-
-  # Events are accumulated in a buffer before being sent to Elasticsearch.
-  # Note: Buffering is not supported by NSQ; will be ignored
-  # The buffer is emptied whenever:
-  # - the combined size of the stored records exceeds byteLimit or
-  # - the number of stored records exceeds recordLimit or
-  # - the time in milliseconds since it was last emptied exceeds timeLimit
   buffer {
     byteLimit = 5242880
     recordLimit = 1
@@ -110,31 +54,18 @@ streams {
 
 elasticsearch {
 
-  # Events are indexed using an Elasticsearch Client
-  # - endpoint: the cluster endpoint
-  # - port: the port the cluster can be accessed on
-  #   - for http this is usually 9200
-  #   - for transport this is usually 9300
-  # - max-timeout: the maximum attempt time before a client restart
-  # - ssl: if using the http client, whether to use ssl or not
   client {
-    endpoint = "localhost"
+    endpoint = elasticsearch
     port = 9200
     maxTimeout = 10000
     ssl = false
   }
 
-  # When using the AWS ES service
-  # - signing: if using the http client and the AWS ES service you can sign your requests
-  #    http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html
-  # - region where the AWS ES service is located
   aws {
     signing = false
     region = ""
   }
 
-  # index: the Elasticsearch index name
-  # type: the Elasticsearch index type
   cluster {
     name = elasticsearch
     index = bad

diff --git a/provisioning/resources/configs/snowplow-es-loader-good.hocon b/provisioning/resources/configs/snowplow-es-loader-good.hocon
@@ -14,93 +14,37 @@
 # This file (config.hocon.sample) contains a template with
 # configuration options for the Elasticsearch Loader.
 
-# Sources currently supported are:
-# "kinesis" for reading records from a Kinesis stream
-# "stdin" for reading unencoded tab-separated events from stdin
-# If set to "stdin", JSON documents will not be sent to Elasticsearch
-# but will be written to stdout.
-# "nsq" for reading unencoded tab-separated events from NSQ
 source = nsq
 
-# Where to write good and bad records
 sink {
-  # Sinks currently supported are:
-  # "elasticsearch" for writing good records to Elasticsearch
-  # "stdout" for writing good records to stdout
   good = elasticsearch
-
-  # Sinks currently supported are:
-  # "kinesis" for writing bad records to Kinesis
-  # "stderr" for writing bad records to stderr
-  # "nsq" for writing bad records to NSQ
-  # "none" for ignoring bad records
   bad = nsq
 }
 
-# "good" for a stream of successfully enriched events
-# "bad" for a stream of bad events
-# "plain-json" for writing plain json
 enabled = good
 
-# The following are used to authenticate for the Amazon Kinesis sink.
-#
-# If both are set to "default", the default provider chain is used
-# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html)
-#
-# If both are set to "iam", use AWS IAM Roles to provision credentials.
-#
-# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
 aws {
   accessKey = ""
   secretKey = ""
 }
 
-# config for NSQ
 nsq {
-  # Channel name for NSQ source
   channelName = ESLoaderChannelGood
-
-  # Host name for NSQ tools
-  host = "127.0.0.1"
-
-  # TCP port for nsqd
+  host = nsqlookupd
   port = 4150
-
-  # HTTP port for nsqlookupd
   lookupPort = 4161
 }
 
 kinesis {
-  # "LATEST": most recent data.
-  # "TRIM_HORIZON": oldest available data.
-  # "AT_TIMESTAMP": Start from the record at or after the specified timestamp
-  # Note: This only affects the first run of this application on a stream.
   initialPosition = TRIM_HORIZON
-
-  # Maximum number of records to get from Kinesis per call to GetRecords
   maxRecords = 1000
-
-  # Region where the Kinesis stream is located
   region = ""
-
-  # "appName" is used for a DynamoDB table to maintain stream state.
-  # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}"
   appName = ""
 }
 
-# Common configuration section for all stream sources
 streams {
   inStreamName = EnrichedEvents
-
-  # Stream for enriched events which are rejected by Elasticsearch
   outStreamName = BadElasticsearchEvents
-
-  # Events are accumulated in a buffer before being sent to Elasticsearch.
-  # Note: Buffering is not supported by NSQ; will be ignored
-  # The buffer is emptied whenever:
-  # - the combined size of the stored records exceeds byteLimit or
-  # - the number of stored records exceeds recordLimit or
-  # - the time in milliseconds since it was last emptied exceeds timeLimit
   buffer {
     byteLimit: 5242880
     recordLimit: 1
@@ -110,31 +54,18 @@ streams {
 
 elasticsearch {
 
-  # Events are indexed using an Elasticsearch Client
-  # - endpoint: the cluster endpoint
-  # - port: the port the cluster can be accessed on
-  #   - for http this is usually 9200
-  #   - for transport this is usually 9300
-  # - max-timeout: the maximum attempt time before a client restart
-  # - ssl: if using the http client, whether to use ssl or not
   client {
-    endpoint = "localhost"
+    endpoint = elasticsearch
     port = 9200
     maxTimeout = 10000
     ssl = false
   }
 
-  # When using the AWS ES service
-  # - signing: if using the http client and the AWS ES service you can sign your requests
-  #    http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html
-  # - region where the AWS ES service is located
   aws {
     signing = false
     region = ""
   }
 
-  # index: the Elasticsearch index name
-  # type: the Elasticsearch index type
   cluster {
     name = "elasticsearch"
     index = "good"