diff --git a/Vagrantfile b/Vagrantfile index 6274fc18..2af3a356 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -10,6 +10,7 @@ Vagrant.configure("2") do |config| config.vm.network "forwarded_port", guest: 80, host: 2000 config.vm.network "forwarded_port", guest: 3000, host: 3000 + config.vm.network "forwarded_port", guest: 4171, host: 4171 config.vm.network "forwarded_port", guest: 8080, host: 8080 config.vm.network "forwarded_port", guest: 9200, host: 9200 config.vm.network "forwarded_port", guest: 5601, host: 5601 @@ -20,8 +21,8 @@ Vagrant.configure("2") do |config| vb.name = Dir.pwd().split("/")[-1] + "-" + Time.now.to_f.to_i.to_s vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"] vb.customize [ "guestproperty", "set", :id, "--timesync-threshold", 10000 ] - vb.memory = 4096 - vb.cpus = 1 + vb.memory = 8192 + vb.cpus = 2 end config.vm.provision :shell do |sh| diff --git a/integration/integration_test.sh b/integration/integration_test.sh index 2d3b67ab..2f60541d 100755 --- a/integration/integration_test.sh +++ b/integration/integration_test.sh @@ -1,14 +1,5 @@ #!/bin/bash -sudo service elasticsearch start -sudo service iglu_server_0.2.0 start -sudo service snowplow_stream_collector start -sudo service snowplow_stream_enrich start -sudo service snowplow_elasticsearch_loader_good start -sudo service snowplow_elasticsearch_loader_bad start -sudo service kibana4_init start -sleep 15 - # Send good and bad events COUNTER=0 while [ $COUNTER -lt 10 ]; do @@ -16,7 +7,7 @@ while [ $COUNTER -lt 10 ]; do curl http://localhost:8080/i let COUNTER=COUNTER+1 done -sleep 60 +sleep 90 # Assertions good_count="$(curl --silent -XGET 'http://localhost:9200/good/good/_count' | python -c 'import json,sys;obj=json.load(sys.stdin);print obj["count"]')" diff --git a/provisioning/resources/configs/Caddyfile b/provisioning/resources/configs/Caddyfile index 05e74b5e..b4e3b638 100644 --- a/provisioning/resources/configs/Caddyfile +++ b/provisioning/resources/configs/Caddyfile @@ -4,12 +4,14 @@ /home /kibana /elasticsearch + /nsqadmin /control-plane - /_plugin } redir /home /home/ - redir /kibana /kibana/ redir /iglu-server /iglu-server/ + redir /kibana /kibana/ + redir /nsqadmin /nsqadmin/ + redir /elasticsearch /elasticsearch/ proxy / localhost:8080 @@ -20,16 +22,29 @@ proxy /kibana localhost:5601 { without /kibana } + proxy /app/kibana localhost:5601 + proxy /app/timelion localhost:5601 + proxy /bundles localhost:5601 + proxy /plugins localhost:5601 + proxy /ui localhost:5601 + proxy /api localhost:5601 proxy /iglu-server localhost:8081 { without /iglu-server } - proxy /api localhost:8081 + proxy /api-docs localhost:8081 + + proxy /nsqadmin localhost:4171 { + without /nsqadmin + } + proxy /static localhost:4171 + proxy /api/counter localhost:4171 + proxy /api/nodes localhost:4171 + proxy /api/topics localhost:4171 proxy /elasticsearch localhost:9200 { without /elasticsearch } - proxy /_plugin localhost:9200 proxy /control-plane localhost:10000 { without /control-plane diff --git a/provisioning/resources/configs/control-plane-api.toml b/provisioning/resources/configs/control-plane-api.toml index 9ba911fb..52581400 100644 --- a/provisioning/resources/configs/control-plane-api.toml +++ b/provisioning/resources/configs/control-plane-api.toml @@ -14,15 +14,15 @@ caddy = "Caddyfile" iglu_resolver = "iglu-resolver.json" [init_scripts] -stream_collector = "snowplow_stream_collector" -stream_enrich = "snowplow_stream_enrich" -es_loader_good = "snowplow_elasticsearch_loader_good" -es_loader_bad = "snowplow_elasticsearch_loader_bad" -iglu = "iglu_server_0.2.0" +stream_collector = "scala-stream-collector" +stream_enrich = "stream-enrich" +es_loader_good = "elasticsearch-loader-good" +es_loader_bad = "elasticsearch-loader-bad" +iglu = "iglu-server" caddy = "caddy_init" [PSQL] user = "snowplow" password = "snowplow" database = "iglu" -adddress = "127.0.0.1:5432" +address = "127.0.0.1:5433" diff --git a/provisioning/resources/configs/iglu-resolver.json b/provisioning/resources/configs/iglu-resolver.json index 9506f8e0..fbf56217 100644 --- a/provisioning/resources/configs/iglu-resolver.json +++ b/provisioning/resources/configs/iglu-resolver.json @@ -24,7 +24,7 @@ ], "connection": { "http": { - "uri": "http://localhost:8081/api", + "uri": "http://iglu-server:8081/api", "apikey": "PLACEHOLDER" } } diff --git a/provisioning/resources/configs/iglu-server.conf b/provisioning/resources/configs/iglu-server.conf index 254cfef1..793e7838 100644 --- a/provisioning/resources/configs/iglu-server.conf +++ b/provisioning/resources/configs/iglu-server.conf @@ -1,4 +1,4 @@ -# Copyright (c) 2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2014-2018 Snowplow Analytics Ltd. All rights reserved. # # This program is licensed to you under the Apache License Version 2.0, and # you may not use this file except in compliance with the Apache License @@ -15,15 +15,19 @@ # the Iglu repository server. # 'repo-server' contains configuration options for the repo-server. +# interface on which the server will be running +# baseURL is address of deployment, ":/" address used for baseURL of Swagger UI +# port on which the server will be running repo-server { interface = "0.0.0.0" + baseURL = "0.0.0.0/iglu-server" port = 8081 } # 'postgres' contains configuration options for the postgre instance the server # is using postgres { - host = "localhost" + host = "postgres" port = 5432 dbname = "iglu" username = "snowplow" @@ -32,14 +36,16 @@ postgres { } akka { + loggers = ["akka.event.slf4j.Slf4jLogger"] loglevel = INFO log-dead-letters = off + stdout-loglevel = "DEBUG" + logging-filter = "akka.event.slf4j.Slf4jLoggingFilter" } -# spray-can is the HTTP server the Iglu repository server is built on. -spray.can { +akka.http { server { - request-timeout = 10s + request-timeout = 10 seconds remote-address-header = on parsing.uri-parsing-mode = relaxed } diff --git a/provisioning/resources/configs/snowplow-es-loader-bad.hocon b/provisioning/resources/configs/snowplow-es-loader-bad.hocon index 50909e58..01eab2af 100644 --- a/provisioning/resources/configs/snowplow-es-loader-bad.hocon +++ b/provisioning/resources/configs/snowplow-es-loader-bad.hocon @@ -14,93 +14,37 @@ # This file (config.hocon.sample) contains a template with # configuration options for the Elasticsearch Loader. -# Sources currently supported are: -# "kinesis" for reading records from a Kinesis stream -# "stdin" for reading unencoded tab-separated events from stdin -# If set to "stdin", JSON documents will not be sent to Elasticsearch -# but will be written to stdout. -# "nsq" for reading unencoded tab-separated events from NSQ source = nsq -# Where to write good and bad records sink { - # Sinks currently supported are: - # "elasticsearch" for writing good records to Elasticsearch - # "stdout" for writing good records to stdout good = elasticsearch - - # Sinks currently supported are: - # "kinesis" for writing bad records to Kinesis - # "stderr" for writing bad records to stderr - # "nsq" for writing bad records to NSQ - # "none" for ignoring bad records bad = none } -# "good" for a stream of successfully enriched events -# "bad" for a stream of bad events -# "plain-json" for writing plain json enabled = bad -# The following are used to authenticate for the Amazon Kinesis sink. -# -# If both are set to "default", the default provider chain is used -# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) -# -# If both are set to "iam", use AWS IAM Roles to provision credentials. -# -# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY aws { accessKey: "" secretKey: "" } -# config for NSQ nsq { - # Channel name for NSQ source channelName = ESLoaderChannelBad - - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd + host = nsqlookupd port = 4150 - - # HTTP port for nsqlookupd lookupPort = 4161 } kinesis { - # "LATEST": most recent data. - # "TRIM_HORIZON": oldest available data. - # "AT_TIMESTAMP": Start from the record at or after the specified timestamp - # Note: This only affects the first run of this application on a stream. initialPosition= TRIM_HORIZON - - # Maximum number of records to get from Kinesis per call to GetRecords maxRecords = 1000 - - # Region where the Kinesis stream is located region = "" - - # "appName" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}" appName = "" } -# Common configuration section for all stream sources streams { inStreamName = BadEnrichedEvents - - # Stream for enriched events which are rejected by Elasticsearch outStreamName = BadElasticsearchEvents - - # Events are accumulated in a buffer before being sent to Elasticsearch. - # Note: Buffering is not supported by NSQ; will be ignored - # The buffer is emptied whenever: - # - the combined size of the stored records exceeds byteLimit or - # - the number of stored records exceeds recordLimit or - # - the time in milliseconds since it was last emptied exceeds timeLimit buffer { byteLimit = 5242880 recordLimit = 1 @@ -110,31 +54,18 @@ streams { elasticsearch { - # Events are indexed using an Elasticsearch Client - # - endpoint: the cluster endpoint - # - port: the port the cluster can be accessed on - # - for http this is usually 9200 - # - for transport this is usually 9300 - # - max-timeout: the maximum attempt time before a client restart - # - ssl: if using the http client, whether to use ssl or not client { - endpoint = "localhost" + endpoint = elasticsearch port = 9200 maxTimeout = 10000 ssl = false } - # When using the AWS ES service - # - signing: if using the http client and the AWS ES service you can sign your requests - # http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html - # - region where the AWS ES service is located aws { signing = false region = "" } - # index: the Elasticsearch index name - # type: the Elasticsearch index type cluster { name = elasticsearch index = bad diff --git a/provisioning/resources/configs/snowplow-es-loader-good.hocon b/provisioning/resources/configs/snowplow-es-loader-good.hocon index 35b579f7..0ddd2d0d 100644 --- a/provisioning/resources/configs/snowplow-es-loader-good.hocon +++ b/provisioning/resources/configs/snowplow-es-loader-good.hocon @@ -14,93 +14,37 @@ # This file (config.hocon.sample) contains a template with # configuration options for the Elasticsearch Loader. -# Sources currently supported are: -# "kinesis" for reading records from a Kinesis stream -# "stdin" for reading unencoded tab-separated events from stdin -# If set to "stdin", JSON documents will not be sent to Elasticsearch -# but will be written to stdout. -# "nsq" for reading unencoded tab-separated events from NSQ source = nsq -# Where to write good and bad records sink { - # Sinks currently supported are: - # "elasticsearch" for writing good records to Elasticsearch - # "stdout" for writing good records to stdout good = elasticsearch - - # Sinks currently supported are: - # "kinesis" for writing bad records to Kinesis - # "stderr" for writing bad records to stderr - # "nsq" for writing bad records to NSQ - # "none" for ignoring bad records bad = nsq } -# "good" for a stream of successfully enriched events -# "bad" for a stream of bad events -# "plain-json" for writing plain json enabled = good -# The following are used to authenticate for the Amazon Kinesis sink. -# -# If both are set to "default", the default provider chain is used -# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) -# -# If both are set to "iam", use AWS IAM Roles to provision credentials. -# -# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY aws { accessKey = "" secretKey = "" } -# config for NSQ nsq { - # Channel name for NSQ source channelName = ESLoaderChannelGood - - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd + host = nsqlookupd port = 4150 - - # HTTP port for nsqlookupd lookupPort = 4161 } kinesis { - # "LATEST": most recent data. - # "TRIM_HORIZON": oldest available data. - # "AT_TIMESTAMP": Start from the record at or after the specified timestamp - # Note: This only affects the first run of this application on a stream. initialPosition = TRIM_HORIZON - - # Maximum number of records to get from Kinesis per call to GetRecords maxRecords = 1000 - - # Region where the Kinesis stream is located region = "" - - # "appName" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}" appName = "" } -# Common configuration section for all stream sources streams { inStreamName = EnrichedEvents - - # Stream for enriched events which are rejected by Elasticsearch outStreamName = BadElasticsearchEvents - - # Events are accumulated in a buffer before being sent to Elasticsearch. - # Note: Buffering is not supported by NSQ; will be ignored - # The buffer is emptied whenever: - # - the combined size of the stored records exceeds byteLimit or - # - the number of stored records exceeds recordLimit or - # - the time in milliseconds since it was last emptied exceeds timeLimit buffer { byteLimit: 5242880 recordLimit: 1 @@ -110,31 +54,18 @@ streams { elasticsearch { - # Events are indexed using an Elasticsearch Client - # - endpoint: the cluster endpoint - # - port: the port the cluster can be accessed on - # - for http this is usually 9200 - # - for transport this is usually 9300 - # - max-timeout: the maximum attempt time before a client restart - # - ssl: if using the http client, whether to use ssl or not client { - endpoint = "localhost" + endpoint = elasticsearch port = 9200 maxTimeout = 10000 ssl = false } - # When using the AWS ES service - # - signing: if using the http client and the AWS ES service you can sign your requests - # http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html - # - region where the AWS ES service is located aws { signing = false region = "" } - # index: the Elasticsearch index name - # type: the Elasticsearch index type cluster { name = "elasticsearch" index = "good" diff --git a/provisioning/resources/configs/snowplow-stream-collector.hocon b/provisioning/resources/configs/snowplow-stream-collector.hocon index efea8200..6d712b09 100644 --- a/provisioning/resources/configs/snowplow-stream-collector.hocon +++ b/provisioning/resources/configs/snowplow-stream-collector.hocon @@ -18,69 +18,49 @@ # 'collector' contains configuration options for the main Scala collector. collector { - # The collector runs as a web service specified on the following - # interface and port. interface = "0.0.0.0" port = 8080 - # Configure the P3P policy header. p3p { policyRef = "/w3c/p3p.xml" CP = "NOI DSP COR NID PSA OUR IND COM NAV STA" } - # The collector returns a cookie to clients for user identification - # with the following domain and expiration. + crossDomain { + enabled = false + domain = "*" + secure = true + } + cookie { enabled = true expiration = "365 days" # e.g. "365 days" - # Network cookie name name = sp - # The domain is optional and will make the cookie accessible to other - # applications on the domain. Comment out this line to tie cookies to - # the collector's full domain domain = "" } - # When enabled and the cookie specified above is missing, performs a redirect to itself to check - # if third-party cookies are blocked using the specified name. If they are indeed blocked, - # fallbackNetworkId is used instead of generating a new random one. cookieBounce { enabled = false - # The name of the request parameter which will be used on redirects checking that third-party - # cookies work. name = "n3pc" - # Network user id to fallback to when third-party cookies are blocked. fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000" } + redirectMacro { + enabled = false + placeholder = "[TOKEN]" + } + streams { - # Events which have successfully been collected will be stored in the good stream/topic good = RawEvents - - # Events that are too big (w.r.t Kinesis 1MB limit) will be stored in the bad stream/topic bad = BadRawEvents - - # Whether to use the incoming event's ip as the partition key for the good stream/topic useIpAddressAsPartitionKey = false - # config for NSQ sink sink { enabled = nsq - - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd + host = nsqd port = 4150 } - # Incoming events are stored in a buffer before being sent to Kinesis/Kafka. - # Note: Buffering is not supported by NSQ. - # The buffer is emptied whenever: - # - the number of stored records reaches record-limit or - # - the combined size of the stored records reaches byte-limit or - # - the time in milliseconds since the buffer was last emptied reaches time-limit buffer { byteLimit = 4000000 recordLimit = 500 # Not supported by Kafka; will be ignored @@ -89,23 +69,13 @@ collector { } } -# Akka has a variety of possible configuration options defined at -# http://doc.akka.io/docs/akka/current/scala/general/configuration.html akka { loglevel = DEBUG # 'OFF' for no logging, 'DEBUG' for all logging. loggers = ["akka.event.slf4j.Slf4jLogger"] - # akka-http is the server the Stream collector uses and has configurable options defined at - # http://doc.akka.io/docs/akka-http/current/scala/http/configuration.html http.server { - # To obtain the hostname in the collector, the 'remote-address' header - # should be set. By default, this is disabled, and enabling it - # adds the 'Remote-Address' header to every request automatically. remote-address-header = on - raw-request-uri-header = on - - # Define the maximum request length (the default is 2048) parsing { max-uri-length = 32768 uri-parsing-mode = relaxed diff --git a/provisioning/resources/configs/snowplow-stream-enrich.hocon b/provisioning/resources/configs/snowplow-stream-enrich.hocon index 353095bb..b653c604 100644 --- a/provisioning/resources/configs/snowplow-stream-enrich.hocon +++ b/provisioning/resources/configs/snowplow-stream-enrich.hocon @@ -19,38 +19,21 @@ enrich { streams { in { - # Stream/topic where the raw events to be enriched are located raw = RawEvents } out { - # Stream/topic where the events that were successfully enriched will end up enriched = EnrichedEvents - # Stream/topic where the event that failed enrichment will be stored bad = BadEnrichedEvents - - # How the output stream/topic will be partitioned. - # Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid, - # user_ipaddress, domain_sessionid, user_fingerprint. - # Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the - # possible partition keys correspond to. - # Otherwise, the partition key will be a random UUID. - # Note: Nsq does not make use of partition key. partitionKey = "" } sourceSink { enabled = nsq - - # Channel name for nsq source rawChannel = StreamEnrichChannel - # Host name for nsqd - host = "127.0.0.1" - # TCP port for nsqd, 4150 by default + host = nsqd port = 4150 - # Host name for lookupd - lookupHost = "127.0.0.1" - # HTTP port for nsqlookupd, 4161 by default + lookupHost = nsqlookupd lookupPort = 4161 } @@ -60,6 +43,6 @@ enrich { timeLimit = 5000 } - appName = "" + appName = "snowplow-stream-enrich" } } diff --git a/provisioning/resources/control-plane/change_credentials_test.go b/provisioning/resources/control-plane/change_credentials_test.go index e186fe01..b71f1466 100644 --- a/provisioning/resources/control-plane/change_credentials_test.go +++ b/provisioning/resources/control-plane/change_credentials_test.go @@ -38,7 +38,6 @@ func TestChangeCredentials(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` expectedCaddyConfigHeadAfter := @@ -49,7 +48,6 @@ func TestChangeCredentials(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` dir, err := ioutil.TempDir("", "testDir") diff --git a/provisioning/resources/control-plane/change_domain_name_test.go b/provisioning/resources/control-plane/change_domain_name_test.go index 44075d27..c71a7df8 100644 --- a/provisioning/resources/control-plane/change_domain_name_test.go +++ b/provisioning/resources/control-plane/change_domain_name_test.go @@ -38,7 +38,6 @@ func TestChangeDomainName(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` expectedCaddyConfigHeadAfter := @@ -49,7 +48,6 @@ func TestChangeDomainName(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` dir, err := ioutil.TempDir("", "testDir") diff --git a/provisioning/resources/control-plane/local_iglu.go b/provisioning/resources/control-plane/local_iglu.go index d1a316fb..0cae6c6f 100644 --- a/provisioning/resources/control-plane/local_iglu.go +++ b/provisioning/resources/control-plane/local_iglu.go @@ -51,9 +51,7 @@ func (li LocalIglu) addApiKeyToConfig() error { for i, repo := range igluConf.Data.Repos { igluUri := repo.Conn.Http["uri"] - if strings.Contains(igluUri, "localhost") || - strings.Contains(igluUri, "127.0.0.1") { - + if strings.Contains(igluUri, "iglu-server") { igluConf.Data.Repos[i].Conn.Http["apikey"] = li.IgluApikey } } @@ -67,6 +65,7 @@ func (li LocalIglu) addApiKeyToConfig() error { } func (li LocalIglu) insertApiKeyToDb() error { + db := pg.Connect(&pg.Options{ User: li.Psql.User, Password: li.Psql.Password, diff --git a/provisioning/resources/control-plane/main.go b/provisioning/resources/control-plane/main.go index 5280100c..7f8bbcb1 100644 --- a/provisioning/resources/control-plane/main.go +++ b/provisioning/resources/control-plane/main.go @@ -35,7 +35,8 @@ var configPath string var config ControlPlaneConfig func main() { - configFlag := flag.String("config", "", "Control Plane API config file") + configFlag := flag.String("config", "/home/ubuntu/snowplow/configs/control-plane-api.toml", + "Control Plane API config file") flag.Parse() configPath = *configFlag @@ -209,6 +210,7 @@ func addLocalIgluApikey(resp http.ResponseWriter, req *http.Request) { IgluApikey: igluApikey, Psql: psqlInfos, } + err := localIglu.addApiKey() if err != nil { http.Error(resp, err.Error(), 500) diff --git a/provisioning/resources/control-plane/restart_services.go b/provisioning/resources/control-plane/restart_services.go index dbef2a08..57c9f57d 100644 --- a/provisioning/resources/control-plane/restart_services.go +++ b/provisioning/resources/control-plane/restart_services.go @@ -35,38 +35,34 @@ func restartService(service string) error { } if val, ok := initMap[service]; ok { - restartCommand := []string{"service", val, "restart"} - - cmd := exec.Command("/bin/bash", restartCommand...) - err := cmd.Run() - if err != nil { - return err + if service == "caddy" { + restartCommand := []string{"service", val, "restart"} + cmd := exec.Command("/bin/bash", restartCommand...) + err := cmd.Run() + if err != nil { + return err + } + return nil + } else { + restartCommandArgs := []string{"-f", "/home/ubuntu/snowplow/docker-compose.yml", + "restart", val} + cmd := exec.Command("/usr/local/bin/docker-compose", restartCommandArgs...) + err := cmd.Run() + if err != nil { + return err + } + return nil } - return nil } - return errors.New("unrecognized service") + return errors.New("unrecognized service: " + service) } func restartSPServices() error { - err := restartService("streamCollector") - if err != nil { - return err - } - - err = restartService("streamEnrich") - if err != nil { - return err - } - - err = restartService("esLoaderGood") + restartCommandArgs := []string{"-f", "/home/ubuntu/snowplow/docker-compose.yml", "restart"} + cmd := exec.Command("/usr/local/bin/docker-compose", restartCommandArgs...) + err := cmd.Run() if err != nil { return err } - - err = restartService("esLoaderBad") - if err != nil { - return err - } - return nil } diff --git a/provisioning/resources/elasticsearch/config/elasticsearch.yml b/provisioning/resources/elasticsearch/config/elasticsearch.yml new file mode 100644 index 00000000..1f8f4c63 --- /dev/null +++ b/provisioning/resources/elasticsearch/config/elasticsearch.yml @@ -0,0 +1,33 @@ +# ======================== Elasticsearch Configuration ========================= +# +# NOTE: Elasticsearch comes with reasonable defaults for most settings. +# Before you set out to tweak and tune the configuration, make sure you +# understand what are you trying to accomplish and the consequences. +# +# The primary way of configuring a node is via this file. This template lists +# the most important settings you may want to configure for a production cluster. +# +# Please consult the documentation for further information on configuration options: +# https://www.elastic.co/guide/en/elasticsearch/reference/index.html +# +# ---------------------------------- Cluster ----------------------------------- +# +# Use a descriptive name for your cluster: +# +cluster.name: "sp-mini-es-cluster" +# +# ------------------------------------ Node ------------------------------------ +# +# Use a descriptive name for the node: +# +node.name: "sp-mini-es-node" +# ---------------------------------- Network ----------------------------------- +# +# Set the bind address to a specific IP (IPv4 or IPv6): +# +network.host: 0.0.0.0 +# --------------------------------- Discovery ---------------------------------- +# +# Prevent the "split brain" by configuring the majority of nodes (total number of master-eligible nodes / 2 + 1): +# +discovery.zen.minimum_master_nodes: 1 diff --git a/provisioning/resources/elasticsearch/config/jvm.options b/provisioning/resources/elasticsearch/config/jvm.options new file mode 100644 index 00000000..5a8c88ce --- /dev/null +++ b/provisioning/resources/elasticsearch/config/jvm.options @@ -0,0 +1,102 @@ +## JVM configuration + +################################################################ +## IMPORTANT: JVM heap size +################################################################ +## +## You should always set the min and max JVM heap +## size to the same value. For example, to set +## the heap to 4 GB, set: +## +## -Xms4g +## -Xmx4g +## +## See https://www.elastic.co/guide/en/elasticsearch/reference/current/heap-size.html +## for more information +## +################################################################ + +# Xms represents the initial size of total heap space +# Xmx represents the maximum size of total heap space + +-Xms4g +-Xmx4g + +################################################################ +## Expert settings +################################################################ +## +## All settings below this section are considered +## expert settings. Don't tamper with them unless +## you understand what you are doing +## +################################################################ + +## GC configuration +-XX:+UseConcMarkSweepGC +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly + +## optimizations + +# pre-touch memory pages used by the JVM during initialization +-XX:+AlwaysPreTouch + +## basic + +# explicitly set the stack size +-Xss1m + +# set to headless, just in case +-Djava.awt.headless=true + +# ensure UTF-8 encoding by default (e.g. filenames) +-Dfile.encoding=UTF-8 + +# use our provided JNA always versus the system one +-Djna.nosys=true + +# turn off a JDK optimization that throws away stack traces for common +# exceptions because stack traces are important for debugging +-XX:-OmitStackTraceInFastThrow + +# flags to configure Netty +-Dio.netty.noUnsafe=true +-Dio.netty.noKeySetOptimization=true +-Dio.netty.recycler.maxCapacityPerThread=0 + +# log4j 2 +-Dlog4j.shutdownHookEnabled=false +-Dlog4j2.disable.jmx=true + +-Djava.io.tmpdir=${ES_TMPDIR} + +## heap dumps + +# generate a heap dump when an allocation from the Java heap fails +# heap dumps are created in the working directory of the JVM +-XX:+HeapDumpOnOutOfMemoryError + +# specify an alternative path for heap dumps; ensure the directory exists and +# has sufficient space +-XX:HeapDumpPath=data + +# specify an alternative path for JVM fatal error logs +-XX:ErrorFile=logs/hs_err_pid%p.log + +## JDK 8 GC logging + +8:-XX:+PrintGCDetails +8:-XX:+PrintGCDateStamps +8:-XX:+PrintTenuringDistribution +8:-XX:+PrintGCApplicationStoppedTime +8:-Xloggc:logs/gc.log +8:-XX:+UseGCLogFileRotation +8:-XX:NumberOfGCLogFiles=32 +8:-XX:GCLogFileSize=64m + +# JDK 9+ GC logging +9-:-Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:filecount=32,filesize=64m +# due to internationalization enhancements in JDK 9 Elasticsearch need to set the provider to COMPAT otherwise +# time/date parsing will break in an incompatible way for some date patterns and locals +9-:-Djava.locale.providers=COMPAT \ No newline at end of file diff --git a/provisioning/resources/elasticsearch/config/kibana.yml b/provisioning/resources/elasticsearch/config/kibana.yml new file mode 100644 index 00000000..c7fbf815 --- /dev/null +++ b/provisioning/resources/elasticsearch/config/kibana.yml @@ -0,0 +1,114 @@ +# Kibana is served by a back end server. This setting specifies the port to use. +server.port: 5601 + +# Specifies the address to which the Kibana server will bind. IP addresses and host names are both valid values. +# The default is 'localhost', which usually means remote machines will not be able to connect. +# To allow connections from remote users, set this parameter to a non-loopback address. +server.host: "0" + +# Enables you to specify a path to mount Kibana at if you are running behind a proxy. +# Use the `server.rewriteBasePath` setting to tell Kibana if it should remove the basePath +# from requests it receives, and to prevent a deprecation warning at startup. +# This setting cannot end in a slash. +server.basePath: "kibana" + +# Specifies whether Kibana should rewrite requests that are prefixed with +# `server.basePath` or require that they are rewritten by your reverse proxy. +# This setting was effectively always `false` before Kibana 6.3 and will +# default to `true` starting in Kibana 7.0. +server.rewriteBasePath: true + +# The maximum payload size in bytes for incoming server requests. +#server.maxPayloadBytes: 1048576 + +# The Kibana server's name. This is used for display purposes. +server.name: "kibana" + +# The URL of the Elasticsearch instance to use for all your queries. +elasticsearch.url: http://elasticsearch:9200 + +# When this setting's value is true Kibana uses the hostname specified in the server.host +# setting. When the value of this setting is false, Kibana uses the hostname of the host +# that connects to this Kibana instance. +#elasticsearch.preserveHost: true + +# Kibana uses an index in Elasticsearch to store saved searches, visualizations and +# dashboards. Kibana creates a new index if the index doesn't already exist. +kibana.index: ".kibana" + +# The default application to load. +kibana.defaultAppId: "discover" + +# If your Elasticsearch is protected with basic authentication, these settings provide +# the username and password that the Kibana server uses to perform maintenance on the Kibana +# index at startup. Your Kibana users still need to authenticate with Elasticsearch, which +# is proxied through the Kibana server. +#elasticsearch.username: "user" +#elasticsearch.password: "pass" + +# Enables SSL and paths to the PEM-format SSL certificate and SSL key files, respectively. +# These settings enable SSL for outgoing requests from the Kibana server to the browser. +#server.ssl.enabled: false +#server.ssl.certificate: /path/to/your/server.crt +#server.ssl.key: /path/to/your/server.key + +# Optional settings that provide the paths to the PEM-format SSL certificate and key files. +# These files validate that your Elasticsearch backend uses the same key files. +#elasticsearch.ssl.certificate: /path/to/your/client.crt +#elasticsearch.ssl.key: /path/to/your/client.key + +# Optional setting that enables you to specify a path to the PEM file for the certificate +# authority for your Elasticsearch instance. +#elasticsearch.ssl.certificateAuthorities: [ "/path/to/your/CA.pem" ] + +# To disregard the validity of SSL certificates, change this setting's value to 'none'. +#elasticsearch.ssl.verificationMode: full + +# Time in milliseconds to wait for Elasticsearch to respond to pings. Defaults to the value of +# the elasticsearch.requestTimeout setting. +#elasticsearch.pingTimeout: 1500 + +# Time in milliseconds to wait for responses from the back end or Elasticsearch. This value +# must be a positive integer. +#elasticsearch.requestTimeout: 30000 + +# List of Kibana client-side headers to send to Elasticsearch. To send *no* client-side +# headers, set this value to [] (an empty list). +#elasticsearch.requestHeadersWhitelist: [ authorization ] + +# Header names and values that are sent to Elasticsearch. Any custom headers cannot be overwritten +# by client-side headers, regardless of the elasticsearch.requestHeadersWhitelist configuration. +#elasticsearch.customHeaders: {} + +# Time in milliseconds for Elasticsearch to wait for responses from shards. Set to 0 to disable. +#elasticsearch.shardTimeout: 30000 + +# Time in milliseconds to wait for Elasticsearch at Kibana startup before retrying. +#elasticsearch.startupTimeout: 5000 + +# Logs queries sent to Elasticsearch. Requires logging.verbose set to true. +#elasticsearch.logQueries: false + +# Specifies the path where Kibana creates the process ID file. +#pid.file: /var/run/kibana.pid + +# Enables you specify a file where Kibana stores log output. +#logging.dest: stdout + +# Set the value of this setting to true to suppress all logging output. +#logging.silent: false + +# Set the value of this setting to true to suppress all logging output other than error messages. +#logging.quiet: false + +# Set the value of this setting to true to log all events, including system usage information +# and all requests. +#logging.verbose: false + +# Set the interval in milliseconds to sample system and process performance +# metrics. Minimum is 100ms. Defaults to 5000. +#ops.interval: 5000 + +# The default locale. This locale can be used in certain circumstances to substitute any missing +# translations. +#i18n.defaultLocale: "en" \ No newline at end of file diff --git a/provisioning/resources/elasticsearch/config/log4j2.properties b/provisioning/resources/elasticsearch/config/log4j2.properties new file mode 100644 index 00000000..d0f8ef04 --- /dev/null +++ b/provisioning/resources/elasticsearch/config/log4j2.properties @@ -0,0 +1,28 @@ +status = error + +appender.console.type = Console +appender.console.name = console +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] %marker%m%n + +appender.rolling.type = RollingFile +appender.rolling.name = rolling +appender.rolling.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}.log +appender.rolling.layout.type = PatternLayout +appender.rolling.layout.pattern = [%d{ISO8601}][%-5p][%-25c] %.10000m%n +appender.rolling.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}.log.zip +appender.rolling.policies.type = Policies +appender.rolling.policies.time.type = TimeBasedTriggeringPolicy +appender.rolling.policies.time.interval = 1 +appender.rolling.policies.time.modulate = true + +appender.rolling.strategy.type = DefaultRolloverStrategy +appender.rolling.strategy.action.type = Delete +appender.rolling.strategy.action.basepath = ${sys:es.logs.base_path} +appender.rolling.strategy.action.condition.type = IfLastModified +appender.rolling.strategy.action.condition.age = 3D +appender.rolling.strategy.action.PathConditions.type = IfFileName +appender.rolling.strategy.action.PathConditions.glob = ${sys:es.logs.cluster_name}-* + +rootLogger.level = info +rootLogger.appenderRef.console.ref = console diff --git a/provisioning/resources/elasticsearch/bad-mapping.json b/provisioning/resources/elasticsearch/mapping/bad-mapping.json similarity index 72% rename from provisioning/resources/elasticsearch/bad-mapping.json rename to provisioning/resources/elasticsearch/mapping/bad-mapping.json index ee8740d0..7b96de74 100644 --- a/provisioning/resources/elasticsearch/bad-mapping.json +++ b/provisioning/resources/elasticsearch/mapping/bad-mapping.json @@ -14,23 +14,15 @@ }, "mappings": { "bad": { - "_timestamp" : { - "enabled" : "yes", - "path" : "failure_tstamp" - }, - "_ttl": { - "enabled": true, - "default": "604800000" - }, "properties": { "errors": { "properties": { "message" : { - "type": "string", + "type": "text", "analyzer": "standard" }, "level" : { - "type": "string", + "type": "text", "analyzer": "standard" } } @@ -40,7 +32,7 @@ "format": "dateOptionalTime" }, "line": { - "type": "string", + "type": "text", "analyzer": "standard" } } diff --git a/provisioning/resources/elasticsearch/good-mapping.json b/provisioning/resources/elasticsearch/mapping/good-mapping.json similarity index 57% rename from provisioning/resources/elasticsearch/good-mapping.json rename to provisioning/resources/elasticsearch/mapping/good-mapping.json index 1102d531..4437a784 100644 --- a/provisioning/resources/elasticsearch/good-mapping.json +++ b/provisioning/resources/elasticsearch/mapping/good-mapping.json @@ -14,29 +14,21 @@ }, "mappings": { "good": { - "_timestamp" : { - "enabled" : "yes", - "path" : "collector_tstamp" - }, - "_ttl": { - "enabled": true, - "default": "604800000" - }, "properties": { "app_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_colordepth": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_cookies": { "type": "boolean" }, "br_family": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_features_director": { "type": "boolean" @@ -66,24 +58,24 @@ "type": "boolean" }, "br_lang": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_name": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_renderengine": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_type": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_version": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_viewheight": { "type": "long" @@ -96,8 +88,8 @@ "format": "dateOptionalTime" }, "doc_charset": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "doc_height": { "type": "long" @@ -106,15 +98,15 @@ "type": "long" }, "domain_sessionid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "domain_sessionidx": { "type": "long" }, "domain_userid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "dvce_ismobile": { "type": "boolean" @@ -134,106 +126,106 @@ "format": "dateOptionalTime" }, "dvce_type": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "etl_tstamp": { "type": "date", "format": "dateOptionalTime" }, "event": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "event_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "geo_location": { "type": "geo_point" }, "mkt_campaign": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_content": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_medium": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_source": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_term": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "name_tracker": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "network_userid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_family": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_manufacturer": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_name": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_timezone": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_referrer": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_title": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_url": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlfragment": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlhost": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlpath": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlport": { "type": "long" }, "page_urlquery": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlscheme": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "platform": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "pp_xoffset_max": { "type": "long" @@ -248,79 +240,79 @@ "type": "long" }, "refr_medium": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_source": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_term": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlfragment": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlhost": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlpath": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlport": { "type": "long" }, "refr_urlquery": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlscheme": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_action": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_category": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_label": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_fingerprint": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_ipaddress": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "useragent": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_collector": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_etl": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_tracker": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true } } } diff --git a/provisioning/resources/init/create.sh b/provisioning/resources/init/create.sh new file mode 100755 index 00000000..2515f377 --- /dev/null +++ b/provisioning/resources/init/create.sh @@ -0,0 +1,43 @@ +#!/bin/sh + +# Create Elasticsearch indices +curl -H 'Content-Type: application/json' -X PUT localhost:9200/good -d @/home/ubuntu/snowplow/elasticsearch/mapping/good-mapping.json && \ +curl -H 'Content-Type: application/json' -X PUT localhost:9200/bad -d @/home/ubuntu/snowplow/elasticsearch/mapping/bad-mapping.json && \ + +# Create Kibana index patterns +curl -X POST \ + http://localhost:5601/api/saved_objects/index-pattern/good \ + -H 'Content-Type: application/json' \ + -H 'kbn-xsrf: true' \ + -d '{ + "attributes": { + "title": "good", + "timeFieldName": "collector_tstamp" + } +}' + +curl -X POST \ + http://localhost:5601/api/saved_objects/index-pattern/bad \ + -H 'Content-Type: application/json' \ + -H 'kbn-xsrf: true' \ + -d '{ + "attributes": { + "title": "bad", + "timeFieldName": "failure_tstamp" + } +}' + +# Set `good` as default index pattern +curl -X POST \ + http://localhost:5601/api/kibana/settings/defaultIndex \ + -H "Content-Type: application/json" \ + -H "kbn-xsrf: true" \ + -d '{ + "value": "good" +}' + +# Create NSQ topics +curl -X POST localhost:4151/topic/create?topic=RawEvents && \ +curl -X POST localhost:4151/topic/create?topic=BadEvents && \ +curl -X POST localhost:4151/topic/create?topic=EnrichedEvents && \ +curl -X POST localhost:4151/topic/create?topic=BadEnrichedEvents diff --git a/provisioning/resources/init/iglu-server-init.sql b/provisioning/resources/init/iglu-server-init.sql new file mode 100644 index 00000000..7e572822 --- /dev/null +++ b/provisioning/resources/init/iglu-server-init.sql @@ -0,0 +1,2 @@ +CREATE USER snowplow WITH PASSWORD 'snowplow'; +CREATE DATABASE iglu OWNER snowplow; diff --git a/provisioning/resources/init/iglu_server_0.2.0 b/provisioning/resources/init/iglu_server_0.2.0 deleted file mode 100755 index 019d11d3..00000000 --- a/provisioning/resources/init/iglu_server_0.2.0 +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/sh -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin/" -cmd="java -Dconfig.file=/home/ubuntu/snowplow/configs/iglu-server.conf -jar $dir/iglu-server-0.2.0.jar com.snowplowanalytics.iglu.server.Boot" -user="ubuntu" - -name="iglu_server_0.2.0" -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/kibana4_init b/provisioning/resources/init/kibana4_init deleted file mode 100755 index da5b12da..00000000 --- a/provisioning/resources/init/kibana4_init +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/sh -# -# /etc/init.d/kibana4_init -- startup script for kibana4 -# bsmith@the408.com 2015-02-20; used elasticsearch init script as template -# https://github.com/akabdog/scripts/edit/master/kibana4_init -# -### BEGIN INIT INFO -# Provides: kibana4_init -# Required-Start: $network $remote_fs $named -# Required-Stop: $network $remote_fs $named -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Starts kibana4_init -# Description: Starts kibana4_init using start-stop-daemon -### END INIT INFO - -#configure this with wherever you unpacked kibana: -KIBANA_BIN=/opt/kibana/bin - -NAME=kibana4 -PID_FILE=/var/run/$NAME.pid -PATH=/bin:/usr/bin:/sbin:/usr/sbin:$KIBANA_BIN -DAEMON=$KIBANA_BIN/kibana -DESC="Kibana4" - -if [ `id -u` -ne 0 ]; then - echo "You need root privileges to run this script" - exit 1 -fi - -. /lib/lsb/init-functions - -if [ -r /etc/default/rcS ]; then - . /etc/default/rcS -fi - -case "$1" in - start) - log_daemon_msg "Starting $DESC" - - pid=`pidofproc -p $PID_FILE kibana` - if [ -n "$pid" ] ; then - log_begin_msg "Already running." - log_end_msg 0 - exit 0 - fi - - # Start Daemon - start-stop-daemon --start --pidfile "$PID_FILE" --make-pidfile --background --exec $DAEMON - log_end_msg $? - ;; - stop) - log_daemon_msg "Stopping $DESC" - - if [ -f "$PID_FILE" ]; then - start-stop-daemon --stop --pidfile "$PID_FILE" \ - --retry=TERM/20/KILL/5 >/dev/null - if [ $? -eq 1 ]; then - log_progress_msg "$DESC is not running but pid file exists, cleaning up" - elif [ $? -eq 3 ]; then - PID="`cat $PID_FILE`" - log_failure_msg "Failed to stop $DESC (pid $PID)" - exit 1 - fi - rm -f "$PID_FILE" - else - log_progress_msg "(not running)" - fi - log_end_msg 0 - ;; - status) - status_of_proc -p $PID_FILE kibana kibana && exit 0 || exit $? - ;; - restart|force-reload) - if [ -f "$PID_FILE" ]; then - $0 stop - sleep 1 - fi - $0 start - ;; - *) - log_success_msg "Usage: $0 {start|stop|restart|force-reload|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/nsqadmin_init b/provisioning/resources/init/nsqadmin_init deleted file mode 100755 index b0c4d7ed..00000000 --- a/provisioning/resources/init/nsqadmin_init +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/bash -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin" -cmd="$dir/nsqadmin --lookupd-http-address=127.0.0.1:4161" -user="" - -name="nsqadmin" - -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/nsqd_init b/provisioning/resources/init/nsqd_init deleted file mode 100755 index 1d69dcb6..00000000 --- a/provisioning/resources/init/nsqd_init +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin" -cmd="$dir/nsqd --lookupd-tcp-address=127.0.0.1:4160 -data-path /home/ubuntu/snowplow/nsq-data" -user="" - -name="nsqd" - -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 - diff --git a/provisioning/resources/init/nsqlookupd_init b/provisioning/resources/init/nsqlookupd_init deleted file mode 100755 index 0c8e8b35..00000000 --- a/provisioning/resources/init/nsqlookupd_init +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/bash -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin" -cmd="$dir/nsqlookupd" -user="" - -name="nsqlookupd" - -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/snowplow_elasticsearch_loader_bad b/provisioning/resources/init/snowplow_elasticsearch_loader_bad deleted file mode 100755 index 939fd4e4..00000000 --- a/provisioning/resources/init/snowplow_elasticsearch_loader_bad +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/sh -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin/" -cmd="java -jar $dir/snowplow-elasticsearch-loader-http-0.10.1.jar --config /home/ubuntu/snowplow/configs/snowplow-es-loader-bad.hocon" -user="ubuntu" - -name="snowplow_elasticsearch_loader_bad" -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/snowplow_elasticsearch_loader_good b/provisioning/resources/init/snowplow_elasticsearch_loader_good deleted file mode 100755 index 597e9002..00000000 --- a/provisioning/resources/init/snowplow_elasticsearch_loader_good +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/sh -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin/" -cmd="java -jar $dir/snowplow-elasticsearch-loader-http-0.10.1.jar --config /home/ubuntu/snowplow/configs/snowplow-es-loader-good.hocon" -user="ubuntu" - -name="snowplow_elasticsearch_loader_good" -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/snowplow_stream_collector b/provisioning/resources/init/snowplow_stream_collector deleted file mode 100755 index 4b8507be..00000000 --- a/provisioning/resources/init/snowplow_stream_collector +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/sh -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin/" -cmd="java -jar $dir/snowplow-stream-collector-0.11.0.jar --config /home/ubuntu/snowplow/configs/snowplow-stream-collector.hocon" -user="ubuntu" - -name="snowplow_stream_collector" -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/snowplow_stream_enrich b/provisioning/resources/init/snowplow_stream_enrich deleted file mode 100755 index f64cfad6..00000000 --- a/provisioning/resources/init/snowplow_stream_enrich +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/sh -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin/" -cmd="java -jar snowplow-stream-enrich-nsq-0.16.1.jar --config /home/ubuntu/snowplow/configs/snowplow-stream-enrich.hocon --resolver file:/home/ubuntu/snowplow/configs/iglu-resolver.json --enrichments file:/home/ubuntu/snowplow/configs/enrichments" -user="ubuntu" - -name="snowplow_stream_enrich" -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - cd $dir - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/wait-for-postgres.sh b/provisioning/resources/init/wait-for-postgres.sh new file mode 100755 index 00000000..479a5d64 --- /dev/null +++ b/provisioning/resources/init/wait-for-postgres.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# wait-for-postgres.sh + +set -e + +host="$1" +shift +cmd="$@" + +until PGPASSWORD=snowplow psql -h "$host" -d "iglu" -U "snowplow" -c '\q'; do + >&2 echo "Postgres is unavailable - sleeping" + sleep 2 +done + +>&2 echo "Postgres is up - executing command" +exec docker-entrypoint.sh $cmd \ No newline at end of file diff --git a/provisioning/resources/ui/js/components/Elasticsearch.tsx b/provisioning/resources/ui/js/components/Elasticsearch.tsx index 0a7618f7..7a878d83 100644 --- a/provisioning/resources/ui/js/components/Elasticsearch.tsx +++ b/provisioning/resources/ui/js/components/Elasticsearch.tsx @@ -43,7 +43,6 @@ export class Elasticsearch extends React.Component<{}, {}> {

Quicklinks:

); diff --git a/provisioning/resources/ui/js/components/Overview.tsx b/provisioning/resources/ui/js/components/Overview.tsx index 7e3ee58d..7dfcbd98 100644 --- a/provisioning/resources/ui/js/components/Overview.tsx +++ b/provisioning/resources/ui/js/components/Overview.tsx @@ -25,7 +25,6 @@ export class Overview extends React.Component<{}, {}> { var collector: string = location.protocol + '//' + window.location.host; var kibana: string = location.protocol + '//' + window.location.host + '/kibana/'; - var head_plugin: string = location.protocol + '//' + window.location.host + '/elasticsearch/_plugin/head/'; var elasticsearch: string = location.protocol + '//' + window.location.host + '/elasticsearch'; return ( @@ -38,7 +37,7 @@ export class Overview extends React.Component<{}, {}> {

You can send events into Snowplow Mini automatically from the Example events page. Simply go to that page and click the sample event buttons.

Alternatively, you can setup any of the Snowplow trackers to send data to this endpoint: {collector}

2. Viewing the events

-

You can view the events that have been sent to Elasticsearch in the Kibana Dashboard or the Head Plugin.

+

You can view the events that have been sent to Elasticsearch in the Kibana Dashboard or the Head Plugin.

You can also submit queries directly to the Elasticsearch endpoint.

3. Understanding how Snowplow Mini works

Quicklinks:

@@ -49,13 +48,14 @@ export class Overview extends React.Component<{}, {}> {

The software stack installed:

    -
  • Snowplow Stream Collector 0.11.0
  • +
  • Snowplow Stream Collector NSQ 0.13.0
  • Snowplow Stream Enrich NSQ 0.16.1
  • -
  • Snowplow Elasticsearch Sink 0.10.1
  • -
  • Snowplow Iglu Server 0.2.0
  • -
  • NSQ 1.0.0
  • -
  • Elasticsearch 1.7.5
  • -
  • Kibana 4.0.1
  • +
  • Snowplow Elasticsearch Loader 0.10.1
  • +
  • Snowplow Iglu Server 0.3.0
  • +
  • Postgres 9.5
  • +
  • NSQ v1.0.0-compat
  • +
  • Elasticsearch-OSS 6.3.1
  • +
  • Kibana-OSS 6.3.1

Stack topology:

diff --git a/provisioning/roles/docker/files/docker-compose.yml b/provisioning/roles/docker/files/docker-compose.yml new file mode 100644 index 00000000..6f8996b2 --- /dev/null +++ b/provisioning/roles/docker/files/docker-compose.yml @@ -0,0 +1,177 @@ +version: "3" + +services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.3.1 + container_name: elasticsearch + restart: always + environment: + # Swapping needs to be disabled for performance and node stability + - "bootstrap.memory_lock=true" + - ES_JAVA_OPTS=-Xms4g -Xmx4g + volumes: + - /home/ubuntu/snowplow/elasticsearch/data:/usr/share/elasticsearch/data + - /home/ubuntu/snowplow/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml + - /home/ubuntu/snowplow/elasticsearch/config/log4j2.properties:/usr/share/elasticsearch/config/log4j2.properties + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + logging: + options: + max-size: "50M" + max-file: "10" + ports: + - "9200:9200" + - "9300:9300" + + kibana: + image: docker.elastic.co/kibana/kibana-oss:6.3.1 + container_name: kibana + restart: always + environment: + - /Users/oguzhanunlu/work/snowplow-mini/provisioning/resources/elasticsearch/config/kibana.yml:/usr/share/kibana/config/kibana.yml + ports: + - "5601:5601" + depends_on: + - elasticsearch + + elasticsearch-loader-good: + image: snowplow-docker-registry.bintray.io/snowplow/elasticsearch-loader:0.10.1 + container_name: elasticsearch-loader-good + command: [ "--config", "/snowplow/config/snowplow-es-loader-good.hocon" ] + restart: always + depends_on: + - elasticsearch + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xmx512m" + + elasticsearch-loader-bad: + image: snowplow-docker-registry.bintray.io/snowplow/elasticsearch-loader:0.10.1 + container_name: elasticsearch-loader-bad + command: [ "--config", "/snowplow/config/snowplow-es-loader-bad.hocon" ] + restart: always + depends_on: + - elasticsearch + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xmx512m" + + nsqlookupd: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqlookupd + command: /nsqlookupd + restart: always + logging: + options: + max-size: "1M" + max-file: "10" + ports: + - "4160:4160" + - "4161:4161" + + nsqd: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqd + command: /nsqd --lookupd-tcp-address=nsqlookupd:4160 --data-path=/home/ubuntu/snowplow/nsq-data + restart: always + volumes: + - /home/ubuntu/snowplow/nsq-data:/home/ubuntu/snowplow/nsq-data + depends_on: + - nsqlookupd + ports: + - "4150:4150" + - "4151:4151" + + nsqadmin: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqadmin + command: /nsqadmin --lookupd-http-address=nsqlookupd:4161 + restart: always + depends_on: + - nsqlookupd + ports: + - "4171:4171" + + scala-stream-collector: + image: snowplow-docker-registry.bintray.io/snowplow/scala-stream-collector-nsq:0.13.0 + container_name: scala-stream-collector-nsq + command: [ "--config", "/snowplow/config/snowplow-stream-collector.hocon" ] + restart: always + depends_on: + - nsqd + ports: + - "8080:8080" + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xmx512m" + + stream-enrich: + image: snowplow-docker-registry.bintray.io/snowplow/stream-enrich-nsq:0.16.1 + container_name: stream-enrich-nsq + command: [ + "--config", "/snowplow/config/snowplow-stream-enrich.hocon", + "--resolver", "file:/snowplow/config/iglu-resolver.json", + "--enrichments", "file:/snowplow/config/enrichments", + "--force-ip-lookups-download" + ] + restart: always + depends_on: + - scala-stream-collector + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xmx512m" + + postgres: + image: postgres:9.5 + container_name: postgres + restart: always + volumes: + - /home/ubuntu/snowplow/init/iglu-server-init.sql:/docker-entrypoint-initdb.d/init.sql + ports: + - "5433:5432" + logging: + options: + max-size: "1M" + max-file: "10" + + iglu-server: + image: snowplow-docker-registry.bintray.io/snowplow/iglu-server:0.3.0 + container_name: iglu-server + entrypoint: /snowplow/bin/wait-for-postgres.sh postgres --config /snowplow/config/iglu-server.conf + restart: always + depends_on: + - postgres + ports: + - "8081:8081" + volumes: + - /home/ubuntu/snowplow/init/wait-for-postgres.sh:/snowplow/bin/wait-for-postgres.sh + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" diff --git a/provisioning/roles/docker/tasks/main.yml b/provisioning/roles/docker/tasks/main.yml new file mode 100644 index 00000000..d952aad9 --- /dev/null +++ b/provisioning/roles/docker/tasks/main.yml @@ -0,0 +1,43 @@ +--- +- include_vars: ../../common_vars.yml + +- name: Setup the docker repository and install docker + sudo: yes + shell: | + apt-get update + apt-get install apt-transport-https ca-certificates curl software-properties-common --yes + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" + apt-get update && apt-get install docker-ce --yes + +- name: Download docker-compose + sudo: yes + shell: curl -L https://github.com/docker/compose/releases/download/1.21.2/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose + +- name: Apply executable permissions to the docker-compose binary + sudo: yes + shell: chmod +x /usr/local/bin/docker-compose + +- name: Copy docker-compose.yml + copy: src={{ item.src }} dest={{ item.dest }} owner=ubuntu group=ubuntu mode=0644 + with_items: + - { src: '../files/docker-compose.yml', dest: '/home/ubuntu/snowplow/' } + +- name: Create & set permissions of elasticsearch data directory + become: yes + shell: mkdir {{main_dir}}/elasticsearch/data && chown -R 1000:1000 {{main_dir}}/elasticsearch/data + +- name: Deploy snowplow mini + become: yes + shell: cd {{main_dir}} && docker-compose up -d && sleep 20 + +- name: Wait for Elasticsearch port 9200 to become open on the host, don't start checking for 10 seconds + wait_for: + port: 9200 + delay: 10 + sleep: 5 + connect_timeout: 60 + +- name: Create ES indexes & Kibana index patterns & NSQ topics + become: yes + shell: sh {{init_dir}}/create.sh diff --git a/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml b/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml index 2a9cd15a..b583f489 100644 --- a/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml +++ b/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml @@ -10,26 +10,8 @@ shell: 'adduser ubuntu --disabled-password --gecos "" ; passwd -d ubuntu' -- name: Insert logrotate configuration for Snowplow Services - become: yes - copy: - dest: "/etc/logrotate.d/snowplow-apps" - content: | - /var/log/snowplow*.log /var/log/snowplow*.err /var/log/nsq*.log /var/log/nsq*.err /var/log/iglu_server*.log /var/log/iglu_server*.err { - hourly - rotate 3 - missingok - notifempty - copytruncate - } - mode: 0644 - -- name: Change logrotate cron to hourly - become: yes - shell: 'mv /etc/cron.daily/logrotate /etc/cron.hourly && service cron restart' - - name: creating directories - file: path={{item}} state=directory + file: path={{item}} state=directory mode=0755 with_items: - "{{configs_dir}}" - "{{staging_dir}}" @@ -48,16 +30,19 @@ src: "{{playbook_dir}}/resources/elasticsearch" dest: "{{main_dir}}" recursive: yes + archive: no - synchronize: src: "{{playbook_dir}}/resources/configs" dest: "{{main_dir}}" recursive: yes + archive: no - synchronize: src: "{{playbook_dir}}/resources/init" dest: "{{main_dir}}" recursive: yes + archive: no - name: Install NTP to prevent clock drifts become: yes diff --git a/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml b/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml deleted file mode 100644 index ad7c423b..00000000 --- a/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml +++ /dev/null @@ -1,53 +0,0 @@ ---- -- include_vars: ../../common_vars.yml - -- name: Adding APT repository key - become: yes - apt_key: - id: ACCC4CF8 - url: https://www.postgresql.org/media/keys/ACCC4CF8.asc - tags: - - postgresql - - db - - repo - -- name: Add PostgreSQL official APT repository - become: yes - apt_repository: - repo: "deb http://apt.postgresql.org/pub/repos/apt/ {{ansible_distribution_release}}-pgdg main" - tags: - - postgresql - - db - - repo - -- name: Install acl for creating Postgresql user - become: yes - apt: - name: "acl" - state: present - update_cache: yes - cache_valid_time: 3600 - -- name: Install PostgreSQL - become: yes - apt: - name: "postgresql-9.5" - state: present - update_cache: yes - cache_valid_time: 3600 - tags: - - postgresql - - db - - deps - -- name: Install dependencies for the Ansible module - become: yes - apt: - name: "{{item}}" - state: latest - with_items: - - python-psycopg2 - tags: - - postgresql - - db - - deps diff --git a/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml b/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml index a2896642..ca78bfd4 100644 --- a/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml +++ b/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml @@ -3,43 +3,8 @@ - name: Set variables set_fact: - stream_collector_package: 'snowplow_scala_stream_collector_0.11.0.zip' - stream_enrich_package: 'snowplow_stream_enrich_nsq_0.16.1.zip' - es_loader_package: 'snowplow_elasticsearch_loader_http_0.10.1.zip' - iglu_server_package: 'iglu_server_0.2.0.zip' - kibana_v: '4.0.1' - nsq_package: 'nsq-1.0.0-compat.linux-amd64.go1.8.tar.gz' - nsq_bin_dir: 'nsq-1.0.0-compat.linux-amd64.go1.8/bin' control_plane_dir: '{{playbook_dir}}/resources/control-plane' -- name: Install unzip - become: yes - apt: - name: "unzip" - state: present - update_cache: yes - cache_valid_time: 3600 - -- name: Add Java 8 repository - become: yes - apt_repository: - repo: 'ppa:webupd8team/java' - state: present - -- name: Signed Oracle License - become: yes - shell: "echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | sudo /usr/bin/debconf-set-selections" - register: oracle_license_signed - -- name: Install Java 8 - become: yes - apt: - name: oracle-java8-installer - state: present - update_cache: yes - cache_valid_time: 3600 - when: oracle_license_signed|changed - - name: Copy Control API to executables dir become: yes synchronize: @@ -52,134 +17,6 @@ src: "{{playbook_dir}}/../VERSION" dest: "{{main_dir}}" -- name: Check Stream Collector - stat: - path: "{{staging_dir}}/{{stream_collector_package}}" - register: check_stream_collector_result - -- name: Download Stream Collector - get_url: - url: "http://dl.bintray.com/snowplow/snowplow-generic/{{stream_collector_package}}" - dest: "{{staging_dir}}" - when: check_stream_collector_result.stat.exists == False - register: stream_collector_downloaded - -- name: Unzip downloaded Stream Collector - shell: "unzip {{staging_dir}}/{{stream_collector_package}} -d {{executables_dir}}" - when: stream_collector_downloaded|changed - -- name: Check Stream Enrich - stat: - path: "{{staging_dir}}/{{stream_enrich_package}}" - register: check_stream_enrich_result - -- name: Download Stream Enrich - get_url: - url: "http://dl.bintray.com/snowplow/snowplow-generic/{{stream_enrich_package}}" - dest: "{{staging_dir}}" - when: check_stream_enrich_result.stat.exists == False - register: stream_enrich_downloaded - -- name: Unzip downloaded Stream Enrich - shell: "unzip {{staging_dir}}/{{stream_enrich_package}} -d {{executables_dir}}" - when: stream_enrich_downloaded|changed - -- name: Check Elasticsearch Loader - stat: - path: "{{staging_dir}}/{{es_loader_package}}" - register: check_es_loader_result - -- name: Download Elasticsearch Loader - get_url: - url: "http://bintray.com/artifact/download/snowplow/snowplow-generic/{{es_loader_package}}" - dest: "{{staging_dir}}" - when: check_es_loader_result.stat.exists == False - register: es_loader_downloaded - -- name: Unzip downloaded Elasticsearch Loader - shell: "unzip {{staging_dir}}/{{es_loader_package}} -d {{executables_dir}}" - when: es_loader_downloaded|changed - -- name: Check Iglu Server - stat: - path: "{{staging_dir}}/{{iglu_server_package}}" - register: check_iglu_server_result - -- name: Download Iglu Server - get_url: - url: "http://bintray.com/artifact/download/snowplow/snowplow-generic/{{iglu_server_package}}" - dest: "{{staging_dir}}" - when: check_iglu_server_result.stat.exists == False - register: iglu_server_downloaded - -- name: Unzip downloaded Iglu Server - shell: "unzip {{staging_dir}}/{{iglu_server_package}} -d {{executables_dir}}" - when: iglu_server_downloaded|changed - register: iglu_server_extracted - -- name: Download NSQ - get_url: - url: "https://s3.amazonaws.com/bitly-downloads/nsq/{{nsq_package}}" - dest: "{{staging_dir}}" - -- name: Unzip downloaded NSQ - shell: "tar xvfz {{staging_dir}}/{{nsq_package}} --directory {{staging_dir}}" - -- name: Copy NSQ binaries to executables_dir - shell: "cp {{staging_dir}}/{{nsq_bin_dir}}/nsqd {{staging_dir}}/{{nsq_bin_dir}}/nsqlookupd {{staging_dir}}/{{nsq_bin_dir}}/nsqadmin {{executables_dir}}" - -- name: Create snowplow user on Postgresql - become: true - become_user: postgres - postgresql_user: - name: snowplow - password: snowplow - -- name: Create iglu db on Postgresql - become: true - become_user: postgres - postgresql_db: - name: iglu - owner: snowplow - -- name: Download Elasticsearch - become: yes - shell: "wget https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.5.deb -P {{staging_dir}}" - -- name: Install Elasticsearch - become: yes - shell: "dpkg -i {{staging_dir}}/elasticsearch-1.7.5.deb" - -- name: Install Elasticsearch Head Plugin - become: yes - shell: "/usr/share/elasticsearch/bin/plugin --install mobz/elasticsearch-head" - -- name: Check Kibana - stat: - path: "{{staging_dir}}/kibana-{{kibana_v}}-linux-x64.zip" - register: check_kibana_result - -- name: Download Kibana - get_url: - url: "https://download.elasticsearch.org/kibana/kibana/kibana-{{kibana_v}}-linux-x64.zip" - dest: "{{staging_dir}}" - when: check_kibana_result.stat.exists == False - register: kibana_downloaded - -- name: Unzip downloaded Kibana package - become: yes - shell: "unzip {{staging_dir}}/kibana-{{kibana_v}}-linux-x64.zip -d /opt/" - when: kibana_downloaded|changed - register: kibana_unzipped - -- name: Symlink for kibana - become: yes - file: - src: "/opt/kibana-{{kibana_v}}-linux-x64" - dest: "/opt/kibana" - state: link - when: kibana_unzipped|changed - - name: Copy Caddy executable to executables dir become: yes environment: @@ -191,4 +28,4 @@ - name: Set owner of the main directory become: yes - shell: "chown -R ubuntu:ubuntu {{main_dir}}" + shell: "chown -R ubuntu:ubuntu {{main_dir}} && chmod 755 -R {{main_dir}}" diff --git a/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml b/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml index b99d4297..079a26b5 100644 --- a/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml +++ b/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml @@ -9,31 +9,35 @@ dest: "/etc/init.d" mode: 0755 with_items: - - kibana4_init - snowplow_mini_control_plane_api - - snowplow_stream_collector - - snowplow_stream_enrich - - snowplow_elasticsearch_loader_good - - snowplow_elasticsearch_loader_bad - - iglu_server_0.2.0 - caddy_init - - nsqd_init - - nsqlookupd_init - - nsqadmin_init - name: Configure for inits for calling at boot time become: yes shell: "update-rc.d {{item}} defaults" with_items: - - kibana4_init - snowplow_mini_control_plane_api - - snowplow_stream_collector - - snowplow_stream_enrich - - snowplow_elasticsearch_loader_good - - snowplow_elasticsearch_loader_bad - - iglu_server_0.2.0 - - elasticsearch - caddy_init - - nsqd_init - - nsqlookupd_init - - nsqadmin_init + +- name: Start UI and Control Plane + become: yes + shell: service snowplow_mini_control_plane_api start && service caddy_init start + +- name: Increase mmap count to recommended 262144 for Elasticsearch + become: yes + shell: echo "vm.max_map_count=262144" >> /etc/sysctl.conf && service procps start + +- cron: + name: "Add cronjob to restart containers at system reboot" + special_time: reboot + job: /usr/local/bin/docker-compose -f /home/ubuntu/snowplow/docker-compose.yml restart && sleep 30 + +- cron: + name: "Add cronjob to crop ES good index's docs older than a week" + special_time: weekly + job: /usr/bin/curl -s -X POST http://localhost:9200/good/_delete_by_query -H 'Content-Type:application/json' -d '{ "query" :{ "range" :{ "collector_tstamp" :{ "lt" :"now-1w/d" } } } }' > /dev/null 2>&1 + +- cron: + name: "Add cronjob to crop ES bad index's docs older than a week" + special_time: weekly + job: /usr/bin/curl -s -X POST http://localhost:9200/bad/_delete_by_query -H 'Content-Type:application/json' -d '{ "query" :{ "range" :{ "failure_tstamp" :{ "lt" :"now-1w/d" } } } }' > /dev/null 2>&1 diff --git a/provisioning/roles/sp_mini_8_configure/tasks/main.yml b/provisioning/roles/sp_mini_8_configure/tasks/main.yml deleted file mode 100644 index 0cc4c509..00000000 --- a/provisioning/roles/sp_mini_8_configure/tasks/main.yml +++ /dev/null @@ -1,80 +0,0 @@ ---- -- include_vars: ../../common_vars.yml - -- name: Starting Elasticsearch - become: yes - service: - name: elasticsearch - state: started - register: ElasticsearchStarted - -- name: Wait for Elasticsearch port 9200 to become open on the host, don't start checking for 10 seconds - wait_for: - port: 9200 - delay: 10 - -- name: curl put good-mapping.json - shell: "curl -XPUT 'http://localhost:9200/good' -d @{{es_dir}}/good-mapping.json" - -- name: curl put bad-mapping.json - shell: "curl -XPUT 'http://localhost:9200/bad' -d @{{es_dir}}/bad-mapping.json" - -- name: Starting nsqd - become: yes - service: - name: nsqd_init - state: started - register: NsqdStarted - -- name: Starting nsqlookupd - become: yes - service: - name: nsqlookupd_init - state: started - register: NsqlookupdStarted - -- name: Starting nsqadmin - become: yes - service: - name: nsqadmin_init - state: started - register: NsqadminStarted - -- name: Wait for the NSQ services to start - wait_for: - port: "{{item}}" - delay: 1 - with_items: - - 4151 - - 4161 - - 4171 - -- name: Starting Kibana - become: yes - service: - name: kibana4_init - state: started - -- name: add "good" index pattern to Kibana - shell: > - curl -XPUT http://localhost:9200/.kibana/index-pattern/good -d '{"title" : "good", "timeFieldName" : "collector_tstamp"}' - -- name: add "bad" index pattern to Kibana - shell: > - curl -XPUT http://localhost:9200/.kibana/index-pattern/bad -d '{"title" : "bad", "timeFieldName" : "failure_tstamp"}' - -- name: make "good" index pattern default - shell: > - curl -XPUT http://localhost:9200/.kibana/config/4.0.1 -d '{"defaultIndex" : "good"}' - -- name: Create new topic for RawEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=RawEvents" - -- name: Create new topic for BadEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=BadEvents" - -- name: Create new topic for EnrichedEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=EnrichedEvents" - -- name: Create new topic for BadEnrichedEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=BadEnrichedEvents" diff --git a/provisioning/with_building_ui_and_go_projects.yml b/provisioning/with_building_ui_and_go_projects.yml index 8bf08842..15899f46 100644 --- a/provisioning/with_building_ui_and_go_projects.yml +++ b/provisioning/with_building_ui_and_go_projects.yml @@ -12,10 +12,9 @@ - typescript - packer - sp_mini_1_create_dirs - - sp_mini_2_install_postgresl - sp_mini_3_build_go_projects - sp_mini_4_setup_apps - sp_mini_5_build_ui - sp_mini_6_copy_ui_folders - sp_mini_7_setup_init - - sp_mini_8_configure + - docker diff --git a/provisioning/without_building_ui_and_go_projects.yml b/provisioning/without_building_ui_and_go_projects.yml index b7483aca..99402670 100644 --- a/provisioning/without_building_ui_and_go_projects.yml +++ b/provisioning/without_building_ui_and_go_projects.yml @@ -7,8 +7,7 @@ roles: - sp_mini_1_create_dirs - - sp_mini_2_install_postgresl - sp_mini_4_setup_apps - sp_mini_6_copy_ui_folders - sp_mini_7_setup_init - - sp_mini_8_configure + - docker diff --git a/utils/scripts/user_data.sh b/utils/scripts/user_data.sh index aa00037c..49412c51 100755 --- a/utils/scripts/user_data.sh +++ b/utils/scripts/user_data.sh @@ -8,7 +8,7 @@ password='password' iglu_server_super_uid='deadbeef-dead-beef-dead-beefdeadbeef' # DO NOT ALTER BELOW # -sudo service iglu_server_0.3.0 restart +sudo /usr/local/bin/docker-compose -f /home/ubuntu/snowplow/docker-compose.yml restart iglu-server sudo service snowplow_mini_control_plane_api restart sleep 10