Skip to content

Commit

Permalink
[WIP] Assemble the apps using Docker Compose (closes #23)
Browse files Browse the repository at this point in the history
  • Loading branch information
oguzhanunlu committed Jun 26, 2018
1 parent 80b1390 commit c83e6c2
Show file tree
Hide file tree
Showing 34 changed files with 525 additions and 705 deletions.
4 changes: 2 additions & 2 deletions Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ Vagrant.configure("2") do |config|
vb.name = Dir.pwd().split("/")[-1] + "-" + Time.now.to_f.to_i.to_s
vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"]
vb.customize [ "guestproperty", "set", :id, "--timesync-threshold", 10000 ]
vb.memory = 4096
vb.cpus = 1
vb.memory = 8192
vb.cpus = 2
end

config.vm.provision :shell do |sh|
Expand Down
9 changes: 0 additions & 9 deletions integration/integration_test.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
#!/bin/bash

sudo service elasticsearch start
sudo service iglu_server_0.3.0 start
sudo service snowplow_stream_collector start
sudo service snowplow_stream_enrich start
sudo service snowplow_elasticsearch_loader_good start
sudo service snowplow_elasticsearch_loader_bad start
sudo service kibana4_init start
sleep 15

# Send good and bad events
COUNTER=0
while [ $COUNTER -lt 10 ]; do
Expand Down
2 changes: 0 additions & 2 deletions provisioning/resources/configs/Caddyfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
/kibana
/elasticsearch
/control-plane
/_plugin
}
redir /home /home/
redir /kibana /kibana/
Expand All @@ -29,7 +28,6 @@
proxy /elasticsearch localhost:9200 {
without /elasticsearch
}
proxy /_plugin localhost:9200

proxy /control-plane localhost:10000 {
without /control-plane
Expand Down
12 changes: 6 additions & 6 deletions provisioning/resources/configs/control-plane-api.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ caddy = "Caddyfile"
iglu_resolver = "iglu-resolver.json"

[init_scripts]
stream_collector = "snowplow_stream_collector"
stream_enrich = "snowplow_stream_enrich"
es_loader_good = "snowplow_elasticsearch_loader_good"
es_loader_bad = "snowplow_elasticsearch_loader_bad"
iglu = "iglu_server_0.3.0"
stream_collector = "scala-stream-collector"
stream_enrich = "stream-enrich"
es_loader_good = "elasticsearch-loader-good"
es_loader_bad = "elasticsearch-loader-bad"
iglu = "iglu-server"
caddy = "caddy_init"

[PSQL]
user = "snowplow"
password = "snowplow"
database = "iglu"
adddress = "127.0.0.1:5432"
address = "127.0.0.1:5433"
2 changes: 1 addition & 1 deletion provisioning/resources/configs/iglu-server.conf
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ repo-server {
# 'postgres' contains configuration options for the postgre instance the server
# is using
postgres {
host = "localhost"
host = "postgres"
port = 5432
dbname = "iglu"
username = "snowplow"
Expand Down
73 changes: 2 additions & 71 deletions provisioning/resources/configs/snowplow-es-loader-bad.hocon
Original file line number Diff line number Diff line change
Expand Up @@ -14,93 +14,37 @@
# This file (config.hocon.sample) contains a template with
# configuration options for the Elasticsearch Loader.

# Sources currently supported are:
# "kinesis" for reading records from a Kinesis stream
# "stdin" for reading unencoded tab-separated events from stdin
# If set to "stdin", JSON documents will not be sent to Elasticsearch
# but will be written to stdout.
# "nsq" for reading unencoded tab-separated events from NSQ
source = nsq

# Where to write good and bad records
sink {
# Sinks currently supported are:
# "elasticsearch" for writing good records to Elasticsearch
# "stdout" for writing good records to stdout
good = elasticsearch

# Sinks currently supported are:
# "kinesis" for writing bad records to Kinesis
# "stderr" for writing bad records to stderr
# "nsq" for writing bad records to NSQ
# "none" for ignoring bad records
bad = none
}

# "good" for a stream of successfully enriched events
# "bad" for a stream of bad events
# "plain-json" for writing plain json
enabled = bad

# The following are used to authenticate for the Amazon Kinesis sink.
#
# If both are set to "default", the default provider chain is used
# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html)
#
# If both are set to "iam", use AWS IAM Roles to provision credentials.
#
# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
aws {
accessKey: ""
secretKey: ""
}

# config for NSQ
nsq {
# Channel name for NSQ source
channelName = ESLoaderChannelBad

# Host name for NSQ tools
host = "127.0.0.1"

# TCP port for nsqd
host = nsqlookupd
port = 4150

# HTTP port for nsqlookupd
lookupPort = 4161
}

kinesis {
# "LATEST": most recent data.
# "TRIM_HORIZON": oldest available data.
# "AT_TIMESTAMP": Start from the record at or after the specified timestamp
# Note: This only affects the first run of this application on a stream.
initialPosition= TRIM_HORIZON

# Maximum number of records to get from Kinesis per call to GetRecords
maxRecords = 1000

# Region where the Kinesis stream is located
region = ""

# "appName" is used for a DynamoDB table to maintain stream state.
# You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}"
appName = ""
}

# Common configuration section for all stream sources
streams {
inStreamName = BadEnrichedEvents

# Stream for enriched events which are rejected by Elasticsearch
outStreamName = BadElasticsearchEvents

# Events are accumulated in a buffer before being sent to Elasticsearch.
# Note: Buffering is not supported by NSQ; will be ignored
# The buffer is emptied whenever:
# - the combined size of the stored records exceeds byteLimit or
# - the number of stored records exceeds recordLimit or
# - the time in milliseconds since it was last emptied exceeds timeLimit
buffer {
byteLimit = 5242880
recordLimit = 1
Expand All @@ -110,31 +54,18 @@ streams {

elasticsearch {

# Events are indexed using an Elasticsearch Client
# - endpoint: the cluster endpoint
# - port: the port the cluster can be accessed on
# - for http this is usually 9200
# - for transport this is usually 9300
# - max-timeout: the maximum attempt time before a client restart
# - ssl: if using the http client, whether to use ssl or not
client {
endpoint = "localhost"
endpoint = elasticsearch
port = 9200
maxTimeout = 10000
ssl = false
}

# When using the AWS ES service
# - signing: if using the http client and the AWS ES service you can sign your requests
# http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html
# - region where the AWS ES service is located
aws {
signing = false
region = ""
}

# index: the Elasticsearch index name
# type: the Elasticsearch index type
cluster {
name = elasticsearch
index = bad
Expand Down
73 changes: 2 additions & 71 deletions provisioning/resources/configs/snowplow-es-loader-good.hocon
Original file line number Diff line number Diff line change
Expand Up @@ -14,93 +14,37 @@
# This file (config.hocon.sample) contains a template with
# configuration options for the Elasticsearch Loader.

# Sources currently supported are:
# "kinesis" for reading records from a Kinesis stream
# "stdin" for reading unencoded tab-separated events from stdin
# If set to "stdin", JSON documents will not be sent to Elasticsearch
# but will be written to stdout.
# "nsq" for reading unencoded tab-separated events from NSQ
source = nsq

# Where to write good and bad records
sink {
# Sinks currently supported are:
# "elasticsearch" for writing good records to Elasticsearch
# "stdout" for writing good records to stdout
good = elasticsearch

# Sinks currently supported are:
# "kinesis" for writing bad records to Kinesis
# "stderr" for writing bad records to stderr
# "nsq" for writing bad records to NSQ
# "none" for ignoring bad records
bad = nsq
}

# "good" for a stream of successfully enriched events
# "bad" for a stream of bad events
# "plain-json" for writing plain json
enabled = good

# The following are used to authenticate for the Amazon Kinesis sink.
#
# If both are set to "default", the default provider chain is used
# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html)
#
# If both are set to "iam", use AWS IAM Roles to provision credentials.
#
# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
aws {
accessKey = ""
secretKey = ""
}

# config for NSQ
nsq {
# Channel name for NSQ source
channelName = ESLoaderChannelGood

# Host name for NSQ tools
host = "127.0.0.1"

# TCP port for nsqd
host = nsqlookupd
port = 4150

# HTTP port for nsqlookupd
lookupPort = 4161
}

kinesis {
# "LATEST": most recent data.
# "TRIM_HORIZON": oldest available data.
# "AT_TIMESTAMP": Start from the record at or after the specified timestamp
# Note: This only affects the first run of this application on a stream.
initialPosition = TRIM_HORIZON

# Maximum number of records to get from Kinesis per call to GetRecords
maxRecords = 1000

# Region where the Kinesis stream is located
region = ""

# "appName" is used for a DynamoDB table to maintain stream state.
# You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}"
appName = ""
}

# Common configuration section for all stream sources
streams {
inStreamName = EnrichedEvents

# Stream for enriched events which are rejected by Elasticsearch
outStreamName = BadElasticsearchEvents

# Events are accumulated in a buffer before being sent to Elasticsearch.
# Note: Buffering is not supported by NSQ; will be ignored
# The buffer is emptied whenever:
# - the combined size of the stored records exceeds byteLimit or
# - the number of stored records exceeds recordLimit or
# - the time in milliseconds since it was last emptied exceeds timeLimit
buffer {
byteLimit: 5242880
recordLimit: 1
Expand All @@ -110,31 +54,18 @@ streams {

elasticsearch {

# Events are indexed using an Elasticsearch Client
# - endpoint: the cluster endpoint
# - port: the port the cluster can be accessed on
# - for http this is usually 9200
# - for transport this is usually 9300
# - max-timeout: the maximum attempt time before a client restart
# - ssl: if using the http client, whether to use ssl or not
client {
endpoint = "localhost"
endpoint = elasticsearch
port = 9200
maxTimeout = 10000
ssl = false
}

# When using the AWS ES service
# - signing: if using the http client and the AWS ES service you can sign your requests
# http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html
# - region where the AWS ES service is located
aws {
signing = false
region = ""
}

# index: the Elasticsearch index name
# type: the Elasticsearch index type
cluster {
name = "elasticsearch"
index = "good"
Expand Down
Loading

0 comments on commit c83e6c2

Please sign in to comment.