diff --git a/integration/integration_test.sh b/integration/integration_test.sh index d228035d..305996c4 100755 --- a/integration/integration_test.sh +++ b/integration/integration_test.sh @@ -2,10 +2,10 @@ sudo service elasticsearch start sudo service iglu_server_0.2.0 start -sudo service snowplow_stream_collector_0.9.0 start -sudo service snowplow_stream_enrich_0.10.0 start -sudo service snowplow_elasticsearch_sink_good_0.8.0 start -sudo service snowplow_elasticsearch_sink_bad_0.8.0 start +sudo service snowplow_stream_collector start +sudo service snowplow_stream_enrich start +sudo service snowplow_elasticsearch_loader_good start +sudo service snowplow_elasticsearch_loader_bad start sudo service kibana4_init start sudo service nginx start sleep 15 @@ -17,7 +17,7 @@ while [ $COUNTER -lt 10 ]; do curl http://localhost:8080/i let COUNTER=COUNTER+1 done -sleep 5 +sleep 60 # Assertions good_count="$(curl --silent -XGET 'http://localhost:9200/good/good/_count' | python -c 'import json,sys;obj=json.load(sys.stdin);print obj["count"]')" @@ -27,7 +27,7 @@ echo "Event Counts:" echo " - Good: ${good_count}" echo " - Bad: ${bad_count}" -if [[ "${good_count}" -eq "10" ]] && [[ "${bad_count}" -eq "11" ]]; then +if [[ "${good_count}" -eq "10" ]] && [[ "${bad_count}" -eq "10" ]]; then exit 0 else exit 1 diff --git a/provisioning/resources/configs/snowplow-elasticsearch-sink-bad.hocon b/provisioning/resources/configs/snowplow-elasticsearch-sink-bad.hocon deleted file mode 100644 index b2754bc6..00000000 --- a/provisioning/resources/configs/snowplow-elasticsearch-sink-bad.hocon +++ /dev/null @@ -1,107 +0,0 @@ -# Default configuration for kinesis-elasticsearch-sink - -sink { - - # Sources currently supported are: - # 'kinesis' for reading records from a Kinesis stream - # 'stdin' for reading unencoded tab-separated events from stdin - # If set to "stdin", JSON documents will not be sent to Elasticsearch - # but will be written to stdout. - source = "stdin" - - # Where to write good and bad records - sink { - # Sinks currently supported are: - # 'elasticsearch' for writing good records to Elasticsearch - # 'stdout' for writing good records to stdout - "good": "elasticsearch" - - # Sinks currently supported are: - # 'kinesis' for writing bad records to Kinesis - # 'stderr' for writing bad records to stderr - # 'none' for ignoring bad records - "bad": "none" - } - - # "good" for a stream of successfully enriched events - # "bad" for a stream of bad events - stream-type: "bad" - - # The following are used to authenticate for the Amazon Kinesis sink. - # - # If both are set to 'default', the default provider chain is used - # (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) - # - # If both are set to 'iam', use AWS IAM Roles to provision credentials. - # - # If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY - aws { - access-key: "" - secret-key: "" - } - - kinesis { - - in { - stream-name: "" # Kinesis stream name - - # LATEST: most recent data. - # TRIM_HORIZON: oldest available data. - # Note: This only affects the first run of this application - # on a stream. - initial-position: "TRIM_HORIZON" - } - - out { - # Stream for enriched events which are rejected by Elasticsearch - stream-name: "" - shards: 1 - } - - region: "" - - # "app-name" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowElasticsearchSink-$\\{connector.kinesis.in.stream-name\\}" - app-name: "" - } - - elasticsearch { - - # Events are indexed using an Elasticsearch Client - # - type: http or transport (will default to transport) - # - endpoint: the cluster endpoint - # - port: the port the cluster can be accessed on - # - for http this is usually 9200 - # - for transport this is usually 9300 - # - max-timeout: the maximum attempt time before a client restart - client { - type: "http" - endpoint: "localhost" - port: 9200 - max-timeout: "10000" - - # Section for configuring the HTTP client - http { - conn-timeout: "10000" - read-timeout: "10000" - } - } - - cluster { - name: "elasticsearch" - index: "bad" - type: "bad" - } - } - - # Events are accumulated in a buffer before being sent to Elasticsearch. - # The buffer is emptied whenever: - # - the combined size of the stored records exceeds byte-limit or - # - the number of stored records exceeds record-limit or - # - the time in milliseconds since it was last emptied exceeds time-limit - buffer { - byte-limit: 5242880 - record-limit: 10000 - time-limit: 60000 - } -} diff --git a/provisioning/resources/configs/snowplow-elasticsearch-sink-good.hocon b/provisioning/resources/configs/snowplow-elasticsearch-sink-good.hocon deleted file mode 100644 index f53cbb10..00000000 --- a/provisioning/resources/configs/snowplow-elasticsearch-sink-good.hocon +++ /dev/null @@ -1,107 +0,0 @@ -# Default configuration for kinesis-elasticsearch-sink - -sink { - - # Sources currently supported are: - # 'kinesis' for reading records from a Kinesis stream - # 'stdin' for reading unencoded tab-separated events from stdin - # If set to "stdin", JSON documents will not be sent to Elasticsearch - # but will be written to stdout. - source = "stdin" - - # Where to write good and bad records - sink { - # Sinks currently supported are: - # 'elasticsearch' for writing good records to Elasticsearch - # 'stdout' for writing good records to stdout - "good": "elasticsearch" - - # Sinks currently supported are: - # 'kinesis' for writing bad records to Kinesis - # 'stderr' for writing bad records to stderr - # 'none' for ignoring bad records - "bad": "stderr" - } - - # "good" for a stream of successfully enriched events - # "bad" for a stream of bad events - stream-type: "good" - - # The following are used to authenticate for the Amazon Kinesis sink. - # - # If both are set to 'default', the default provider chain is used - # (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) - # - # If both are set to 'iam', use AWS IAM Roles to provision credentials. - # - # If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY - aws { - access-key: "" - secret-key: "" - } - - kinesis { - - in { - stream-name: "" # Kinesis stream name - - # LATEST: most recent data. - # TRIM_HORIZON: oldest available data. - # Note: This only affects the first run of this application - # on a stream. - initial-position: "TRIM_HORIZON" - } - - out { - # Stream for enriched events which are rejected by Elasticsearch - stream-name: "" - shards: 1 - } - - region: "" - - # "app-name" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowElasticsearchSink-$\\{connector.kinesis.in.stream-name\\}" - app-name: "" - } - - elasticsearch { - - # Events are indexed using an Elasticsearch Client - # - type: http or transport (will default to transport) - # - endpoint: the cluster endpoint - # - port: the port the cluster can be accessed on - # - for http this is usually 9200 - # - for transport this is usually 9300 - # - max-timeout: the maximum attempt time before a client restart - client { - type: "http" - endpoint: "localhost" - port: 9200 - max-timeout: "10000" - - # Section for configuring the HTTP client - http { - conn-timeout: "10000" - read-timeout: "10000" - } - } - - cluster { - name: "elasticsearch" - index: "good" - type: "good" - } - } - - # Events are accumulated in a buffer before being sent to Elasticsearch. - # The buffer is emptied whenever: - # - the combined size of the stored records exceeds byte-limit or - # - the number of stored records exceeds record-limit or - # - the time in milliseconds since it was last emptied exceeds time-limit - buffer { - byte-limit: 5242880 - record-limit: 10000 - time-limit: 60000 - } -} diff --git a/provisioning/resources/configs/snowplow-es-loader-bad.hocon b/provisioning/resources/configs/snowplow-es-loader-bad.hocon new file mode 100644 index 00000000..31372ae0 --- /dev/null +++ b/provisioning/resources/configs/snowplow-es-loader-bad.hocon @@ -0,0 +1,143 @@ +# Copyright (c) 2014-2017 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, and +# you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the Apache License Version 2.0 is distributed on an "AS +# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the Apache License Version 2.0 for the specific language +# governing permissions and limitations there under. + +# This file (config.hocon.sample) contains a template with +# configuration options for the Elasticsearch Loader. + +# Sources currently supported are: +# "kinesis" for reading records from a Kinesis stream +# "stdin" for reading unencoded tab-separated events from stdin +# If set to "stdin", JSON documents will not be sent to Elasticsearch +# but will be written to stdout. +# "nsq" for reading unencoded tab-separated events from NSQ +source = nsq + +# Where to write good and bad records +sink { + # Sinks currently supported are: + # "elasticsearch" for writing good records to Elasticsearch + # "stdout" for writing good records to stdout + good = elasticsearch + + # Sinks currently supported are: + # "kinesis" for writing bad records to Kinesis + # "stderr" for writing bad records to stderr + # "nsq" for writing bad records to NSQ + # "none" for ignoring bad records + bad = none +} + +# "good" for a stream of successfully enriched events +# "bad" for a stream of bad events +# "plain-json" for writing plain json +enabled = bad + +# The following are used to authenticate for the Amazon Kinesis sink. +# +# If both are set to "default", the default provider chain is used +# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) +# +# If both are set to "iam", use AWS IAM Roles to provision credentials. +# +# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY +aws { + accessKey: "" + secretKey: "" +} + +# config for NSQ +nsq { + # Channel name for NSQ source + channelName = ESLoaderChannelBad + + # Host name for NSQ tools + host = "127.0.0.1" + + # TCP port for nsqd + port = 4150 + + # HTTP port for nsqlookupd + lookupPort = 4161 +} + +kinesis { + # "LATEST": most recent data. + # "TRIM_HORIZON": oldest available data. + # "AT_TIMESTAMP": Start from the record at or after the specified timestamp + # Note: This only affects the first run of this application on a stream. + initialPosition= TRIM_HORIZON + + # Maximum number of records to get from Kinesis per call to GetRecords + maxRecords = 1000 + + # Region where the Kinesis stream is located + region = "" + + # "appName" is used for a DynamoDB table to maintain stream state. + # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}" + appName = "" +} + +# Common configuration section for all stream sources +streams { + inStreamName = BadEnrichedEvents + + # Stream for enriched events which are rejected by Elasticsearch + outStreamName = BadElasticsearchEvents + + # Events are accumulated in a buffer before being sent to Elasticsearch. + # Note: Buffering is not supported by NSQ; will be ignored + # The buffer is emptied whenever: + # - the combined size of the stored records exceeds byteLimit or + # - the number of stored records exceeds recordLimit or + # - the time in milliseconds since it was last emptied exceeds timeLimit + buffer { + byteLimit = 5242880 + recordLimit = 1 + timeLimit = 60000 + } +} + +elasticsearch { + + # Events are indexed using an Elasticsearch Client + # - endpoint: the cluster endpoint + # - port: the port the cluster can be accessed on + # - for http this is usually 9200 + # - for transport this is usually 9300 + # - max-timeout: the maximum attempt time before a client restart + # - ssl: if using the http client, whether to use ssl or not + client { + endpoint = "localhost" + port = 9200 + maxTimeout = 10000 + ssl = false + } + + # When using the AWS ES service + # - signing: if using the http client and the AWS ES service you can sign your requests + # http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html + # - region where the AWS ES service is located + aws { + signing = false + region = "" + } + + # index: the Elasticsearch index name + # type: the Elasticsearch index type + cluster { + name = elasticsearch + index = bad + clusterType = bad + } +} diff --git a/provisioning/resources/configs/snowplow-es-loader-good.hocon b/provisioning/resources/configs/snowplow-es-loader-good.hocon new file mode 100644 index 00000000..4b4726bc --- /dev/null +++ b/provisioning/resources/configs/snowplow-es-loader-good.hocon @@ -0,0 +1,143 @@ +# Copyright (c) 2014-2017 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, and +# you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the Apache License Version 2.0 is distributed on an "AS +# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the Apache License Version 2.0 for the specific language +# governing permissions and limitations there under. + +# This file (config.hocon.sample) contains a template with +# configuration options for the Elasticsearch Loader. + +# Sources currently supported are: +# "kinesis" for reading records from a Kinesis stream +# "stdin" for reading unencoded tab-separated events from stdin +# If set to "stdin", JSON documents will not be sent to Elasticsearch +# but will be written to stdout. +# "nsq" for reading unencoded tab-separated events from NSQ +source = nsq + +# Where to write good and bad records +sink { + # Sinks currently supported are: + # "elasticsearch" for writing good records to Elasticsearch + # "stdout" for writing good records to stdout + good = elasticsearch + + # Sinks currently supported are: + # "kinesis" for writing bad records to Kinesis + # "stderr" for writing bad records to stderr + # "nsq" for writing bad records to NSQ + # "none" for ignoring bad records + bad = nsq +} + +# "good" for a stream of successfully enriched events +# "bad" for a stream of bad events +# "plain-json" for writing plain json +enabled = good + +# The following are used to authenticate for the Amazon Kinesis sink. +# +# If both are set to "default", the default provider chain is used +# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) +# +# If both are set to "iam", use AWS IAM Roles to provision credentials. +# +# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY +aws { + accessKey = "" + secretKey = "" +} + +# config for NSQ +nsq { + # Channel name for NSQ source + channelName = ESLoaderChannelGood + + # Host name for NSQ tools + host = "127.0.0.1" + + # TCP port for nsqd + port = 4150 + + # HTTP port for nsqlookupd + lookupPort = 4161 +} + +kinesis { + # "LATEST": most recent data. + # "TRIM_HORIZON": oldest available data. + # "AT_TIMESTAMP": Start from the record at or after the specified timestamp + # Note: This only affects the first run of this application on a stream. + initialPosition = TRIM_HORIZON + + # Maximum number of records to get from Kinesis per call to GetRecords + maxRecords = 1000 + + # Region where the Kinesis stream is located + region = "" + + # "appName" is used for a DynamoDB table to maintain stream state. + # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}" + appName = "" +} + +# Common configuration section for all stream sources +streams { + inStreamName = EnrichedEvents + + # Stream for enriched events which are rejected by Elasticsearch + outStreamName = BadElasticsearchEvents + + # Events are accumulated in a buffer before being sent to Elasticsearch. + # Note: Buffering is not supported by NSQ; will be ignored + # The buffer is emptied whenever: + # - the combined size of the stored records exceeds byteLimit or + # - the number of stored records exceeds recordLimit or + # - the time in milliseconds since it was last emptied exceeds timeLimit + buffer { + byteLimit: 5242880 + recordLimit: 1 + timeLimit: 60000 + } +} + +elasticsearch { + + # Events are indexed using an Elasticsearch Client + # - endpoint: the cluster endpoint + # - port: the port the cluster can be accessed on + # - for http this is usually 9200 + # - for transport this is usually 9300 + # - max-timeout: the maximum attempt time before a client restart + # - ssl: if using the http client, whether to use ssl or not + client { + endpoint = "localhost" + port = 9200 + maxTimeout = 10000 + ssl = false + } + + # When using the AWS ES service + # - signing: if using the http client and the AWS ES service you can sign your requests + # http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html + # - region where the AWS ES service is located + aws { + signing = false + region = "" + } + + # index: the Elasticsearch index name + # type: the Elasticsearch index type + cluster { + name = "elasticsearch" + index = "good" + clusterType = "good" + } +} diff --git a/provisioning/resources/configs/snowplow-stream-collector.hocon b/provisioning/resources/configs/snowplow-stream-collector.hocon index 008bd46c..fa5e3f0a 100644 --- a/provisioning/resources/configs/snowplow-stream-collector.hocon +++ b/provisioning/resources/configs/snowplow-stream-collector.hocon @@ -1,4 +1,4 @@ -# Copyright (c) 2013-2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2017 Snowplow Analytics Ltd. All rights reserved. # # This program is licensed to you under the Apache License Version 2.0, and # you may not use this file except in compliance with the Apache License @@ -23,22 +23,17 @@ collector { interface = "0.0.0.0" port = 8080 - # Production mode disables additional services helpful for configuring and - # initializing the collector, such as a path '/dump' to view all - # records stored in the current stream. - production = true - # Configure the P3P policy header. p3p { - policyref = "/w3c/p3p.xml" + policyRef = "/w3c/p3p.xml" CP = "NOI DSP COR NID PSA OUR IND COM NAV STA" } # The collector returns a cookie to clients for user identification # with the following domain and expiration. cookie { - enabled = true - expiration = "365 days" + enabled = true + expiration = "365 days" # e.g. "365 days" # Network cookie name name = sp # The domain is optional and will make the cookie accessible to other @@ -47,94 +42,73 @@ collector { domain = "" } - # The collector has a configurable sink for storing data in - # different formats for the enrichment process. - sink { - # Sinks currently supported are: - # 'kinesis' for writing Thrift-serialized records to a Kinesis stream - # 'stdout' for writing Base64-encoded Thrift-serialized records to stdout - # Recommended settings for 'stdout' so each line printed to stdout - # is a serialized record are: - # 1. Setting 'akka.loglevel = OFF' and 'akka.loggers = []' - # to disable all logging. - # 2. Using 'sbt assembly' and 'java -jar ...' to disable - # sbt logging. - enabled = "stdout" - - kinesis { - thread-pool-size: 10 # Thread pool size for Kinesis API requests - - # The following are used to authenticate for the Amazon Kinesis sink. - # - # If both are set to 'cpf', a properties file on the classpath is used. - # http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/ClasspathPropertiesFileCredentialsProvider.html - # - # If both are set to 'iam', use AWS IAM Roles to provision credentials. - # - # If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY - aws { - access-key: "" - secret-key: "" - } - - # Data will be stored in the following stream. - stream { - region: "" - good: "" - bad: "" - } - - # Minimum and maximum backoff periods - backoffPolicy: { - minBackoff: 3000 # 3 seconds - maxBackoff: 600000 # 5 minutes - } - } - - kafka { - brokers: "" - - # Data will be stored in the following topics - topic { - good: "" - bad: "" - } + # When enabled and the cookie specified above is missing, performs a redirect to itself to check + # if third-party cookies are blocked using the specified name. If they are indeed blocked, + # fallbackNetworkId is used instead of generating a new random one. + cookieBounce { + enabled = false + # The name of the request parameter which will be used on redirects checking that third-party + # cookies work. + name = "n3pc" + # Network user id to fallback to when third-party cookies are blocked. + fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000" + } + + streams { + # Events which have successfully been collected will be stored in the good stream/topic + good = RawEvents + + # Events that are too big (w.r.t Kinesis 1MB limit) will be stored in the bad stream/topic + bad = BadRawEvents + + # Whether to use the incoming event's ip as the partition key for the good stream/topic + useIpAddressAsPartitionKey = false + + # config for NSQ sink + sink { + enabled = nsq + + # Host name for NSQ tools + host = "127.0.0.1" + + # TCP port for nsqd + port = 4150 } - # Incoming events are stored in a buffer before being sent to Kinesis. + # Incoming events are stored in a buffer before being sent to Kinesis/Kafka. + # Note: Buffering is not supported by NSQ. # The buffer is emptied whenever: # - the number of stored records reaches record-limit or # - the combined size of the stored records reaches byte-limit or # - the time in milliseconds since the buffer was last emptied reaches time-limit buffer { - byte-limit: 4000000 - record-limit: 500 - time-limit: 5000 + byteLimit = 4000000 + recordLimit = 500 # Not supported by Kafka; will be ignored + timeLimit = 5000 } } } # Akka has a variety of possible configuration options defined at -# http://doc.akka.io/docs/akka/2.2.3/general/configuration.html. +# http://doc.akka.io/docs/akka/current/scala/general/configuration.html akka { - loglevel = OFF # 'OFF' for no logging, 'DEBUG' for all logging. + loglevel = DEBUG # 'OFF' for no logging, 'DEBUG' for all logging. loggers = ["akka.event.slf4j.Slf4jLogger"] -} -# spray-can is the server the Stream collector uses and has configurable -# options defined at -# https://github.com/spray/spray/blob/master/spray-can/src/main/resources/reference.conf -spray.can.server { - # To obtain the hostname in the collector, the 'remote-address' header - # should be set. By default, this is disabled, and enabling it - # adds the 'Remote-Address' header to every request automatically. - remote-address-header = on - - uri-parsing-mode = relaxed - raw-request-uri-header = on - - # Define the maximum request length (the default is 2048) - parsing { - max-uri-length = 32768 + # akka-http is the server the Stream collector uses and has configurable options defined at + # http://doc.akka.io/docs/akka-http/current/scala/http/configuration.html + http.server { + # To obtain the hostname in the collector, the 'remote-address' header + # should be set. By default, this is disabled, and enabling it + # adds the 'Remote-Address' header to every request automatically. + remote-address-header = on + + raw-request-uri-header = on + + # Define the maximum request length (the default is 2048) + parsing { + max-uri-length = 32768 + uri-parsing-mode = relaxed + } } } diff --git a/provisioning/resources/configs/snowplow-stream-enrich.hocon b/provisioning/resources/configs/snowplow-stream-enrich.hocon index eaffc132..f1ec918e 100644 --- a/provisioning/resources/configs/snowplow-stream-enrich.hocon +++ b/provisioning/resources/configs/snowplow-stream-enrich.hocon @@ -1,74 +1,125 @@ -# Default Configuration for Scala Stream Enrich. +# Copyright (c) 2013-2017 Snowplow Analytics Ltd. All rights reserved. +# +# This program is licensed to you under the Apache License Version 2.0, and +# you may not use this file except in compliance with the Apache License +# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at +# http://www.apache.org/licenses/LICENSE-2.0. +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the Apache License Version 2.0 is distributed on an "AS +# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the Apache License Version 2.0 for the specific language +# governing permissions and limitations there under. + +# This file (application.conf.example) contains a template with +# configuration options for Stream Enrich. enrich { # Sources currently supported are: # 'kinesis' for reading Thrift-serialized records from a Kinesis stream + # 'kafka' for reading Thrift-serialized records from a Kafka topic # 'stdin' for reading Base64-encoded Thrift-serialized records from stdin - source = "stdin" + # 'nsq' for reading Base64-encoded Thrift-serialized records from NSQ + source = nsq # Sinks currently supported are: # 'kinesis' for writing enriched events to one Kinesis stream and invalid events to another. + # 'kafka' for writing enriched events to one Kafka topic and invalid events to another. # 'stdouterr' for writing enriched events to stdout and invalid events to stderr. - # Using "sbt assembly" and "java -jar" is recommended to disable sbt - # logging. - sink = "stdouterr" + # Using "sbt assembly" and "java -jar" is recommended to disable sbt logging. + # 'nsq' for writing enriched events to one NSQ topic and invalid events to another. + sink = nsq # AWS credentials - # - # If both are set to 'cpf', a properties file on the classpath is used. - # http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/ClasspathPropertiesFileCredentialsProvider.html - # + # If both are set to 'default', use the default AWS credentials provider chain. # If both are set to 'iam', use AWS IAM Roles to provision credentials. - # # If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY aws { - access-key: "" - secret-key: "" - } - - kafka { - brokers: "" + accessKey = "" + secretKey = "" } streams { - in: { - raw: "" - - # After enrichment, are accumulated in a buffer before being sent to Kinesis. - # The buffer is emptied whenever: - # - the number of stored records reaches record-limit or - # - the combined size of the stored records reaches byte-limit or - # - the time in milliseconds since it was last emptied exceeds time-limit when - # a new event enters the buffer - buffer: { - byte-limit: 4500000 - record-limit: 500 - time-limit: 5000 - } + # Stream/topic where the raw events to be enriched are located + in.raw = RawEvents + + out { + # Stream/topic where the events that were successfully enriched will end up + enriched = EnrichedEvents + # Stream/topic where the event that failed enrichment will be stored + bad = BadEnrichedEvents + + # How the output stream/topic will be partitioned. + # Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid, + # user_ipaddress, domain_sessionid, user_fingerprint. + # Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the + # possible parittion keys correspond to. + # Otherwise, the partition key will be a random UUID. + partitionKey = "" } - out: { - enriched: "" - bad: "" + kinesis { + # Region where the streams are located + region = "" + + # Maximum number of records to get from Kinesis per call to GetRecords + maxRecords = 10000 - # Minimum and maximum backoff periods - # - Units: Milliseconds - backoffPolicy: { - minBackoff: 50 - maxBackoff: 500 + # LATEST: most recent data. + # TRIM_HORIZON: oldest available data. + # Note: This only effects the first run of this application + # on a stream. + initialPosition = TRIM_HORIZON + + # Minimum and maximum backoff periods, in milliseconds + backoffPolicy { + minBackoff = 50 + maxBackoff = 500 } } - # "app-name" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowKinesisEnrich-$\\{enrich.streams.in.raw\\}" - app-name: "" + # Kafka configuration + kafka { + brokers = "" - # LATEST: most recent data. - # TRIM_HORIZON: oldest available data. - # Note: This only effects the first run of this application - # on a stream. - initial-position = "TRIM_HORIZON" + # Number of retries to perform before giving up on sending a record + retries = 0 + } + + # config for nsq + nsq { + # Channel name for raw event source + rawChannel = StreamEnrichChannel + + # Host name for nsqd + host = "127.0.0.1" + + # TCP port for nsqd + port = 4150 + + # Host name for lookupd + lookupHost = "127.0.0.1" + + # Port for nsqlookupd + lookupPort = 4161 + } + + # After enrichment, events are accumulated in a buffer before being sent to Kinesis/Kafka. + # NOTE: Buffering is not supported by NSQ. + # The buffer is emptied whenever: + # - the number of stored records reaches recordLimit or + # - the combined size of the stored records reaches byteLimit or + # - the time in milliseconds since it was last emptied exceeds timeLimit when + # a new event enters the buffer + buffer { + byteLimit = 4500000 + recordLimit = 500 # Not supported by Kafka; will be ignored + timeLimit = 5000 + } - region: "" + # Used for a DynamoDB table to maintain stream state. + # Used as the Kafka consumer group ID. + # You can set it automatically using: "SnowplowEnrich-$\\{enrich.streams.in.raw\\}" + appName = "" } } diff --git a/provisioning/resources/init/iglu_server_0.2.0 b/provisioning/resources/init/iglu_server_0.2.0 index 450d90b5..019d11d3 100755 --- a/provisioning/resources/init/iglu_server_0.2.0 +++ b/provisioning/resources/init/iglu_server_0.2.0 @@ -10,7 +10,7 @@ ### END INIT INFO dir="/home/ubuntu/snowplow/bin/" -cmd="java -Dconfig.file=/home/ubuntu/snowplow/configs/iglu-server.conf -jar iglu-server-0.2.0.jar com.snowplowanalytics.iglu.server.Boot" +cmd="java -Dconfig.file=/home/ubuntu/snowplow/configs/iglu-server.conf -jar $dir/iglu-server-0.2.0.jar com.snowplowanalytics.iglu.server.Boot" user="ubuntu" name="iglu_server_0.2.0" @@ -32,7 +32,6 @@ case "$1" in echo "Already started" else echo "Starting $name" - cd "$dir" if [ -z "$user" ]; then sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & else diff --git a/provisioning/resources/init/nsqd_init b/provisioning/resources/init/nsqd_init new file mode 100755 index 00000000..1d69dcb6 --- /dev/null +++ b/provisioning/resources/init/nsqd_init @@ -0,0 +1,99 @@ +#!/bin/bash +### BEGIN INIT INFO +# Provides: +# Required-Start: $remote_fs $syslog +# Required-Stop: $remote_fs $syslog +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Start daemon at boot time +# Description: Enable service provided by daemon. +### END INIT INFO + +dir="/home/ubuntu/snowplow/bin" +cmd="$dir/nsqd --lookupd-tcp-address=127.0.0.1:4160 -data-path /home/ubuntu/snowplow/nsq-data" +user="" + +name="nsqd" + +pid_file="/var/run/$name.pid" +stdout_log="/var/log/$name.log" +stderr_log="/var/log/$name.err" + +get_pid() { + cat "$pid_file" +} + +is_running() { + [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 +} + +case "$1" in + start) + if is_running; then + echo "Already started" + else + echo "Starting $name" + if [ -z "$user" ]; then + sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & + else + sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & + fi + echo $! > "$pid_file" + if ! is_running; then + echo "Unable to start, see $stdout_log and $stderr_log" + exit 1 + fi + fi + ;; + stop) + if is_running; then + echo -n "Stopping $name.." + kill `get_pid` + for i in {1..10} + do + if ! is_running; then + break + fi + + echo -n "." + sleep 1 + done + echo + + if is_running; then + echo "Not stopped; may still be shutting down or shutdown may have failed" + exit 1 + else + echo "Stopped" + if [ -f "$pid_file" ]; then + rm "$pid_file" + fi + fi + else + echo "Not running" + fi + ;; + restart) + $0 stop + if is_running; then + echo "Unable to stop, will not attempt to start" + exit 1 + fi + $0 start + ;; + status) + if is_running; then + echo "Running" + else + echo "Stopped" + exit 1 + fi + ;; + *) + echo "Usage: $0 {start|stop|restart|status}" + exit 1 + ;; +esac + +exit 0 + diff --git a/provisioning/resources/init/nsqlookupd_init b/provisioning/resources/init/nsqlookupd_init new file mode 100755 index 00000000..0c8e8b35 --- /dev/null +++ b/provisioning/resources/init/nsqlookupd_init @@ -0,0 +1,98 @@ +#!/bin/bash +### BEGIN INIT INFO +# Provides: +# Required-Start: $remote_fs $syslog +# Required-Stop: $remote_fs $syslog +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Start daemon at boot time +# Description: Enable service provided by daemon. +### END INIT INFO + +dir="/home/ubuntu/snowplow/bin" +cmd="$dir/nsqlookupd" +user="" + +name="nsqlookupd" + +pid_file="/var/run/$name.pid" +stdout_log="/var/log/$name.log" +stderr_log="/var/log/$name.err" + +get_pid() { + cat "$pid_file" +} + +is_running() { + [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 +} + +case "$1" in + start) + if is_running; then + echo "Already started" + else + echo "Starting $name" + if [ -z "$user" ]; then + sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & + else + sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & + fi + echo $! > "$pid_file" + if ! is_running; then + echo "Unable to start, see $stdout_log and $stderr_log" + exit 1 + fi + fi + ;; + stop) + if is_running; then + echo -n "Stopping $name.." + kill `get_pid` + for i in {1..10} + do + if ! is_running; then + break + fi + + echo -n "." + sleep 1 + done + echo + + if is_running; then + echo "Not stopped; may still be shutting down or shutdown may have failed" + exit 1 + else + echo "Stopped" + if [ -f "$pid_file" ]; then + rm "$pid_file" + fi + fi + else + echo "Not running" + fi + ;; + restart) + $0 stop + if is_running; then + echo "Unable to stop, will not attempt to start" + exit 1 + fi + $0 start + ;; + status) + if is_running; then + echo "Running" + else + echo "Stopped" + exit 1 + fi + ;; + *) + echo "Usage: $0 {start|stop|restart|status}" + exit 1 + ;; +esac + +exit 0 diff --git a/provisioning/resources/init/snowplow_elasticsearch_sink_bad_0.8.0 b/provisioning/resources/init/snowplow_elasticsearch_loader_bad similarity index 81% rename from provisioning/resources/init/snowplow_elasticsearch_sink_bad_0.8.0 rename to provisioning/resources/init/snowplow_elasticsearch_loader_bad index 0881c1f3..939fd4e4 100755 --- a/provisioning/resources/init/snowplow_elasticsearch_sink_bad_0.8.0 +++ b/provisioning/resources/init/snowplow_elasticsearch_loader_bad @@ -10,10 +10,10 @@ ### END INIT INFO dir="/home/ubuntu/snowplow/bin/" -cmd="./snowplow-elasticsearch-sink-0.8.0-2x --config /home/ubuntu/snowplow/configs/snowplow-elasticsearch-sink-bad.hocon" +cmd="java -jar $dir/snowplow-elasticsearch-loader-http-0.10.1.jar --config /home/ubuntu/snowplow/configs/snowplow-es-loader-bad.hocon" user="ubuntu" -name="snowplow_elasticsearch_sink_bad_0.8.0-2x" +name="snowplow_elasticsearch_loader_bad" pid_file="/var/run/$name.pid" stdout_log="/var/log/$name.log" stderr_log="/var/log/$name.err" @@ -32,11 +32,10 @@ case "$1" in echo "Already started" else echo "Starting $name" - cd "$dir" if [ -z "$user" ]; then - cat /home/ubuntu/snowplow/pipes/bad-1-pipe | sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & + sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & else - cat /home/ubuntu/snowplow/pipes/bad-1-pipe | sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & + sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & fi echo $! > "$pid_file" if ! is_running; then diff --git a/provisioning/resources/init/snowplow_stream_collector_0.9.0 b/provisioning/resources/init/snowplow_elasticsearch_loader_good similarity index 78% rename from provisioning/resources/init/snowplow_stream_collector_0.9.0 rename to provisioning/resources/init/snowplow_elasticsearch_loader_good index f0c30e56..597e9002 100755 --- a/provisioning/resources/init/snowplow_stream_collector_0.9.0 +++ b/provisioning/resources/init/snowplow_elasticsearch_loader_good @@ -10,13 +10,13 @@ ### END INIT INFO dir="/home/ubuntu/snowplow/bin/" -cmd="./snowplow-stream-collector-0.9.0 --config /home/ubuntu/snowplow/configs/snowplow-stream-collector.hocon" +cmd="java -jar $dir/snowplow-elasticsearch-loader-http-0.10.1.jar --config /home/ubuntu/snowplow/configs/snowplow-es-loader-good.hocon" user="ubuntu" -name="snowplow_stream_collector_0.9.0" +name="snowplow_elasticsearch_loader_good" pid_file="/var/run/$name.pid" -stdout_pipe="/home/ubuntu/snowplow/pipes/raw-events-pipe" -stderr_pipe="/home/ubuntu/snowplow/pipes/bad-1-pipe" +stdout_log="/var/log/$name.log" +stderr_log="/var/log/$name.err" get_pid() { cat "$pid_file" @@ -32,15 +32,14 @@ case "$1" in echo "Already started" else echo "Starting $name" - cd "$dir" if [ -z "$user" ]; then - sudo $cmd > "$stdout_pipe" 2> "$stderr_pipe" & + sudo $cmd >> "$stdout_log" 2> "$stderr_log" & else - sudo -u "$user" $cmd > "$stdout_pipe" 2> "$stderr_pipe" & + sudo -u "$user" $cmd >> "$stdout_log" 2> "$stderr_log" & fi echo $! > "$pid_file" if ! is_running; then - echo "Unable to start, see $stdout_pipe and $stderr_pipe" + echo "Unable to start, see $stdout_log and $stderr_log" exit 1 fi fi diff --git a/provisioning/resources/init/snowplow_mini_control_plane_api b/provisioning/resources/init/snowplow_mini_control_plane_api index 8942b367..469249df 100755 --- a/provisioning/resources/init/snowplow_mini_control_plane_api +++ b/provisioning/resources/init/snowplow_mini_control_plane_api @@ -10,7 +10,7 @@ ### END INIT INFO dir="/home/ubuntu/snowplow/bin/" -cmd="./snowplow-mini-control-plane-api -config=/home/ubuntu/snowplow/configs/control-plane-api.toml" +cmd="$dir/snowplow-mini-control-plane-api -config=/home/ubuntu/snowplow/configs/control-plane-api.toml" user="" name="snowplow_mini_control_plane_api" @@ -30,7 +30,6 @@ case "$1" in echo "Already started" else echo "Starting $name" - cd "$dir" if [ -z "$user" ]; then sudo $cmd & else diff --git a/provisioning/resources/init/snowplow_elasticsearch_sink_good_0.8.0 b/provisioning/resources/init/snowplow_stream_collector similarity index 76% rename from provisioning/resources/init/snowplow_elasticsearch_sink_good_0.8.0 rename to provisioning/resources/init/snowplow_stream_collector index 0d733eaf..4b8507be 100755 --- a/provisioning/resources/init/snowplow_elasticsearch_sink_good_0.8.0 +++ b/provisioning/resources/init/snowplow_stream_collector @@ -10,13 +10,13 @@ ### END INIT INFO dir="/home/ubuntu/snowplow/bin/" -cmd="./snowplow-elasticsearch-sink-0.8.0-2x --config /home/ubuntu/snowplow/configs/snowplow-elasticsearch-sink-good.hocon" +cmd="java -jar $dir/snowplow-stream-collector-0.11.0.jar --config /home/ubuntu/snowplow/configs/snowplow-stream-collector.hocon" user="ubuntu" -name="snowplow_elasticsearch_sink_good_0.8.0-2x" +name="snowplow_stream_collector" pid_file="/var/run/$name.pid" stdout_log="/var/log/$name.log" -stderr_pipe="/home/ubuntu/snowplow/pipes/bad-1-pipe" +stderr_log="/var/log/$name.err" get_pid() { cat "$pid_file" @@ -32,15 +32,14 @@ case "$1" in echo "Already started" else echo "Starting $name" - cd "$dir" if [ -z "$user" ]; then - cat /home/ubuntu/snowplow/pipes/enriched-events-pipe | sudo $cmd >> "$stdout_log" 2> "$stderr_pipe" & + sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & else - cat /home/ubuntu/snowplow/pipes/enriched-events-pipe | sudo -u "$user" $cmd >> "$stdout_log" 2> "$stderr_pipe" & + sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & fi echo $! > "$pid_file" if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_pipe" + echo "Unable to start, see $stdout_log and $stderr_log" exit 1 fi fi diff --git a/provisioning/resources/init/snowplow_stream_enrich_0.10.0 b/provisioning/resources/init/snowplow_stream_enrich similarity index 70% rename from provisioning/resources/init/snowplow_stream_enrich_0.10.0 rename to provisioning/resources/init/snowplow_stream_enrich index c70ca5d7..e6424ce4 100755 --- a/provisioning/resources/init/snowplow_stream_enrich_0.10.0 +++ b/provisioning/resources/init/snowplow_stream_enrich @@ -10,13 +10,13 @@ ### END INIT INFO dir="/home/ubuntu/snowplow/bin/" -cmd="./snowplow-stream-enrich-0.10.0 --config /home/ubuntu/snowplow/configs/snowplow-stream-enrich.hocon --resolver file:/home/ubuntu/snowplow/configs/iglu-resolver.json --enrichments file:/home/ubuntu/snowplow/configs/enrichments" +cmd="java -jar snowplow-stream-enrich-0.12.0.jar --config /home/ubuntu/snowplow/configs/snowplow-stream-enrich.hocon --resolver file:/home/ubuntu/snowplow/configs/iglu-resolver.json --enrichments file:/home/ubuntu/snowplow/configs/enrichments" user="ubuntu" -name="snowplow_stream_enrich_0.10.0" +name="snowplow_stream_enrich" pid_file="/var/run/$name.pid" -stdout_pipe="/home/ubuntu/snowplow/pipes/enriched-events-pipe" -stderr_pipe="/home/ubuntu/snowplow/pipes/bad-1-pipe" +stdout_log="/var/log/$name.log" +stderr_log="/var/log/$name.err" get_pid() { cat "$pid_file" @@ -32,15 +32,15 @@ case "$1" in echo "Already started" else echo "Starting $name" - cd "$dir" + cd $dir if [ -z "$user" ]; then - cat /home/ubuntu/snowplow/pipes/raw-events-pipe | sudo $cmd > "$stdout_pipe" 2> "$stderr_pipe" & + sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & else - cat /home/ubuntu/snowplow/pipes/raw-events-pipe | sudo -u "$user" $cmd > "$stdout_pipe" 2> "$stderr_pipe" & + sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & fi echo $! > "$pid_file" if ! is_running; then - echo "Unable to start, see $stdout_pipe and $stderr_pipe" + echo "Unable to start, see $stdout_log and $stderr_log" exit 1 fi fi diff --git a/provisioning/roles/sp_mini_1_create_dirs_and_pipes/tasks/main.yml b/provisioning/roles/sp_mini_1_create_dirs_and_pipes/tasks/main.yml index 5df80f6c..c117d33a 100644 --- a/provisioning/roles/sp_mini_1_create_dirs_and_pipes/tasks/main.yml +++ b/provisioning/roles/sp_mini_1_create_dirs_and_pipes/tasks/main.yml @@ -1,6 +1,10 @@ --- - include_vars: ../../common_vars.yml +- name: Set nsq data directory + set_fact: + nsq_data_dir: "{{main_dir}}/nsq-data" + - name: Create user ubuntu and disable password become: yes shell: 'adduser ubuntu --disabled-password --gecos "" ; @@ -17,6 +21,7 @@ - "{{scripts_dir}}" - "{{init_dir}}" - "{{ui_dir}}" + - "{{nsq_data_dir}}" #playbook_dir is ansible defined variable which stores top level ansible-playbook directory - name: Copy folders to the /home/ubuntu/snowplow @@ -36,16 +41,3 @@ src: "{{playbook_dir}}/resources/init" dest: "{{main_dir}}" recursive: yes - -- name: set pipe directories - set_fact: - raw_events_pipe={{unix_pipes_dir}}/raw-events-pipe - enriched_pipe={{unix_pipes_dir}}/enriched-events-pipe - bad_1_pipe={{unix_pipes_dir}}/bad-1-pipe - -- name: create pipes - shell: mkfifo {{item}} - with_items: - - "{{raw_events_pipe}}" - - "{{enriched_pipe}}" - - "{{bad_1_pipe}}" diff --git a/provisioning/roles/sp_mini_3_setup_apps/tasks/main.yml b/provisioning/roles/sp_mini_3_setup_apps/tasks/main.yml index 1ac4ee32..479025e1 100644 --- a/provisioning/roles/sp_mini_3_setup_apps/tasks/main.yml +++ b/provisioning/roles/sp_mini_3_setup_apps/tasks/main.yml @@ -3,9 +3,12 @@ - name: Set version variables set_fact: - kinesis_package: 'snowplow_kinesis_r85_metamorphosis.zip' - iglu_server_package: 'iglu_server_0.2.0.zip' - kibana_v: '4.0.1' + kinesis_package: 'snowplow_kinesis_r85_metamorphosis.zip' + es_loader_package: 'snowplow_elasticsearch_loader_http_0.10.0_rc1.zip' + iglu_server_package: 'iglu_server_0.2.0.zip' + kibana_v: '4.0.1' + nsq_package: 'nsq-1.0.0-compat.linux-amd64.go1.8.tar.gz' + nsq_bin_dir: 'nsq-1.0.0-compat.linux-amd64.go1.8/bin' - name: Install unzip become: yes @@ -42,26 +45,51 @@ dest: "{{executables_dir}}" - name: Copy VERSION file to /home/ubuntu/snowplow for Control API - become: yes - synchronize: + become: yes + synchronize: src: "{{playbook_dir}}/../VERSION" dest: "{{main_dir}}" -- name: Check Kinesis Packages +#- name: Check Kinesis Packages +# stat: +# path: "{{staging_dir}}/{{kinesis_package}}" +# register: check_kinesis_packages_result + +#- name: Download Kinesis Packages +# get_url: +# url: "http://dl.bintray.com/snowplow/snowplow-generic/{{kinesis_package}}" +# dest: "{{staging_dir}}" +# when: check_kinesis_packages_result.stat.exists == False +# register: kinesis_packages_downloaded + +#- name: Unzip downloaded Kinesis Packages +# shell: "unzip {{staging_dir}}/{{kinesis_package}} -d {{executables_dir}}" +# when: kinesis_packages_downloaded|changed + +# this section will be removed after new version of the snowplow-apps +# is placed on the bintray +- name: Copy snowplow-apps to the executables dir + become: yes + block: + - synchronize: + src: "{{playbook_dir}}/resources/snowplow-apps/" + dest: "{{executables_dir}}" + +- name: Check Elasticsearch Loader stat: - path: "{{staging_dir}}/{{kinesis_package}}" - register: check_kinesis_packages_result + path: "{{staging_dir}}/{{es_loader_package}}" + register: check_es_loader_result -- name: Download Kinesis Packages +- name: Download Elasticsearch Loader get_url: - url: "http://dl.bintray.com/snowplow/snowplow-generic/{{kinesis_package}}" + url: "http://bintray.com/artifact/download/snowplow/snowplow-generic/{{es_loader_package}}" dest: "{{staging_dir}}" - when: check_kinesis_packages_result.stat.exists == False - register: kinesis_packages_downloaded + when: check_es_loader_result.stat.exists == False + register: es_loader_downloaded -- name: Unzip downloaded Kinesis Packages - shell: "unzip {{staging_dir}}/{{kinesis_package}} -d {{executables_dir}}" - when: kinesis_packages_downloaded|changed +- name: Unzip downloaded Elasticsearch Loader + shell: "unzip {{staging_dir}}/{{es_loader_package}} -d {{executables_dir}}" + when: es_loader_downloaded|changed - name: Check Iglu Server stat: @@ -80,6 +108,17 @@ when: iglu_server_downloaded|changed register: iglu_server_extracted +- name: Download NSQ + get_url: + url: "https://s3.amazonaws.com/bitly-downloads/nsq/{{nsq_package}}" + dest: "{{staging_dir}}" + +- name: Unzip downloaded NSQ + shell: "tar xvfz {{staging_dir}}/{{nsq_package}} --directory {{staging_dir}}" + +- name: Copy NSQ binaries to executables_dir + shell: "cp {{staging_dir}}/{{nsq_bin_dir}}/nsqd {{staging_dir}}/{{nsq_bin_dir}}/nsqlookupd {{executables_dir}}" + - name: Create snowplow user on Postgresql become: true become_user: postgres @@ -115,7 +154,7 @@ get_url: url: "https://download.elasticsearch.org/kibana/kibana/kibana-{{kibana_v}}-linux-x64.zip" dest: "{{staging_dir}}" - when: check_kinesis_packages_result.stat.exists == False + when: check_kibana_result.stat.exists == False register: kibana_downloaded - name: Unzip downloaded Kibana package diff --git a/provisioning/roles/sp_mini_6_setup_init/tasks/main.yml b/provisioning/roles/sp_mini_6_setup_init/tasks/main.yml index bea85a68..a38c25df 100644 --- a/provisioning/roles/sp_mini_6_setup_init/tasks/main.yml +++ b/provisioning/roles/sp_mini_6_setup_init/tasks/main.yml @@ -11,13 +11,15 @@ with_items: - kibana4_init - snowplow_mini_control_plane_api - - snowplow_stream_collector_0.9.0 - - snowplow_stream_enrich_0.10.0 - - snowplow_elasticsearch_sink_good_0.8.0 - - snowplow_elasticsearch_sink_bad_0.8.0 + - snowplow_stream_collector + - snowplow_stream_enrich + - snowplow_elasticsearch_loader_good + - snowplow_elasticsearch_loader_bad - iglu_server_0.2.0 - - nginx_passenger - - caddy_init + - nginx_passenger + - caddy_init + - nsqd_init + - nsqlookupd_init - name: Configure for inits for calling at boot time become: yes @@ -25,12 +27,14 @@ with_items: - kibana4_init - snowplow_mini_control_plane_api - - snowplow_stream_collector_0.9.0 - - snowplow_stream_enrich_0.10.0 - - snowplow_elasticsearch_sink_good_0.8.0 - - snowplow_elasticsearch_sink_bad_0.8.0 + - snowplow_stream_collector + - snowplow_stream_enrich + - snowplow_elasticsearch_loader_good + - snowplow_elasticsearch_loader_bad - iglu_server_0.2.0 - nginx_passenger - nginx - elasticsearch - - caddy_init + - caddy_init + - nsqd_init + - nsqlookupd_init diff --git a/provisioning/roles/sp_mini_7_configure/tasks/main.yml b/provisioning/roles/sp_mini_7_configure/tasks/main.yml index 3e86702e..1422cd53 100644 --- a/provisioning/roles/sp_mini_7_configure/tasks/main.yml +++ b/provisioning/roles/sp_mini_7_configure/tasks/main.yml @@ -42,25 +42,25 @@ - name: Starting Snowplow Stream Collector become: yes service: - name: snowplow_stream_collector_0.9.0 + name: snowplow_stream_collector state: started - name: Starting Snowplow Stream Enrich become: yes service: - name: snowplow_stream_enrich_0.10.0 + name: snowplow_stream_enrich state: started - name: Starting Snowplow Elastic Search Sink Good become: yes service: - name: snowplow_elasticsearch_sink_good_0.8.0 + name: snowplow_elasticsearch_loader_good state: started - name: Starting Snowplow Elastic Search Sink Bad become: yes service: - name: snowplow_elasticsearch_sink_bad_0.8.0 + name: snowplow_elasticsearch_loader_bad state: started - name: Starting Nginx @@ -75,6 +75,18 @@ name: caddy_init state: started +- name: Starting nsqd + become: yes + service: + name: nsqd_init + state: started + +- name: Starting nsqlookupd + become: yes + service: + name: nsqlookupd_init + state: started + - name: Sleep 30 second pause: seconds: 30 @@ -90,4 +102,16 @@ - name: make "good" index pattern default shell: > curl -XPUT http://localhost:9200/.kibana/config/4.0.1 -d '{"defaultIndex" : "good"}' + +- name: Create new topic for RawEvents + shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=RawEvents" + +- name: Create new topic for BadEvents + shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=BadEvents" + +- name: Create new topic for EnrichedEvents + shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=EnrichedEvents" + +- name: Create new topic for BadEnrichedEvents + shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=BadEnrichedEvents" \ No newline at end of file