diff --git a/Vagrantfile b/Vagrantfile index 6274fc18..7c173709 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -20,8 +20,8 @@ Vagrant.configure("2") do |config| vb.name = Dir.pwd().split("/")[-1] + "-" + Time.now.to_f.to_i.to_s vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"] vb.customize [ "guestproperty", "set", :id, "--timesync-threshold", 10000 ] - vb.memory = 4096 - vb.cpus = 1 + vb.memory = 8192 + vb.cpus = 2 end config.vm.provision :shell do |sh| diff --git a/integration/integration_test.sh b/integration/integration_test.sh index 9c411282..d1d3a21b 100755 --- a/integration/integration_test.sh +++ b/integration/integration_test.sh @@ -1,14 +1,5 @@ #!/bin/bash -sudo service elasticsearch start -sudo service iglu_server_0.3.0 start -sudo service snowplow_stream_collector start -sudo service snowplow_stream_enrich start -sudo service snowplow_elasticsearch_loader_good start -sudo service snowplow_elasticsearch_loader_bad start -sudo service kibana4_init start -sleep 15 - # Send good and bad events COUNTER=0 while [ $COUNTER -lt 10 ]; do diff --git a/provisioning/resources/configs/Caddyfile b/provisioning/resources/configs/Caddyfile index 05e74b5e..60c8dd6d 100644 --- a/provisioning/resources/configs/Caddyfile +++ b/provisioning/resources/configs/Caddyfile @@ -5,7 +5,6 @@ /kibana /elasticsearch /control-plane - /_plugin } redir /home /home/ redir /kibana /kibana/ @@ -29,7 +28,6 @@ proxy /elasticsearch localhost:9200 { without /elasticsearch } - proxy /_plugin localhost:9200 proxy /control-plane localhost:10000 { without /control-plane diff --git a/provisioning/resources/configs/control-plane-api.toml b/provisioning/resources/configs/control-plane-api.toml index 897138be..52581400 100644 --- a/provisioning/resources/configs/control-plane-api.toml +++ b/provisioning/resources/configs/control-plane-api.toml @@ -14,15 +14,15 @@ caddy = "Caddyfile" iglu_resolver = "iglu-resolver.json" [init_scripts] -stream_collector = "snowplow_stream_collector" -stream_enrich = "snowplow_stream_enrich" -es_loader_good = "snowplow_elasticsearch_loader_good" -es_loader_bad = "snowplow_elasticsearch_loader_bad" -iglu = "iglu_server_0.3.0" +stream_collector = "scala-stream-collector" +stream_enrich = "stream-enrich" +es_loader_good = "elasticsearch-loader-good" +es_loader_bad = "elasticsearch-loader-bad" +iglu = "iglu-server" caddy = "caddy_init" [PSQL] user = "snowplow" password = "snowplow" database = "iglu" -adddress = "127.0.0.1:5432" +address = "127.0.0.1:5433" diff --git a/provisioning/resources/configs/iglu-server.conf b/provisioning/resources/configs/iglu-server.conf index aa3a49c1..99ed11a9 100644 --- a/provisioning/resources/configs/iglu-server.conf +++ b/provisioning/resources/configs/iglu-server.conf @@ -27,7 +27,7 @@ repo-server { # 'postgres' contains configuration options for the postgre instance the server # is using postgres { - host = "localhost" + host = "postgres" port = 5432 dbname = "iglu" username = "snowplow" diff --git a/provisioning/resources/configs/snowplow-es-loader-bad.hocon b/provisioning/resources/configs/snowplow-es-loader-bad.hocon index 50909e58..01eab2af 100644 --- a/provisioning/resources/configs/snowplow-es-loader-bad.hocon +++ b/provisioning/resources/configs/snowplow-es-loader-bad.hocon @@ -14,93 +14,37 @@ # This file (config.hocon.sample) contains a template with # configuration options for the Elasticsearch Loader. -# Sources currently supported are: -# "kinesis" for reading records from a Kinesis stream -# "stdin" for reading unencoded tab-separated events from stdin -# If set to "stdin", JSON documents will not be sent to Elasticsearch -# but will be written to stdout. -# "nsq" for reading unencoded tab-separated events from NSQ source = nsq -# Where to write good and bad records sink { - # Sinks currently supported are: - # "elasticsearch" for writing good records to Elasticsearch - # "stdout" for writing good records to stdout good = elasticsearch - - # Sinks currently supported are: - # "kinesis" for writing bad records to Kinesis - # "stderr" for writing bad records to stderr - # "nsq" for writing bad records to NSQ - # "none" for ignoring bad records bad = none } -# "good" for a stream of successfully enriched events -# "bad" for a stream of bad events -# "plain-json" for writing plain json enabled = bad -# The following are used to authenticate for the Amazon Kinesis sink. -# -# If both are set to "default", the default provider chain is used -# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) -# -# If both are set to "iam", use AWS IAM Roles to provision credentials. -# -# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY aws { accessKey: "" secretKey: "" } -# config for NSQ nsq { - # Channel name for NSQ source channelName = ESLoaderChannelBad - - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd + host = nsqlookupd port = 4150 - - # HTTP port for nsqlookupd lookupPort = 4161 } kinesis { - # "LATEST": most recent data. - # "TRIM_HORIZON": oldest available data. - # "AT_TIMESTAMP": Start from the record at or after the specified timestamp - # Note: This only affects the first run of this application on a stream. initialPosition= TRIM_HORIZON - - # Maximum number of records to get from Kinesis per call to GetRecords maxRecords = 1000 - - # Region where the Kinesis stream is located region = "" - - # "appName" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}" appName = "" } -# Common configuration section for all stream sources streams { inStreamName = BadEnrichedEvents - - # Stream for enriched events which are rejected by Elasticsearch outStreamName = BadElasticsearchEvents - - # Events are accumulated in a buffer before being sent to Elasticsearch. - # Note: Buffering is not supported by NSQ; will be ignored - # The buffer is emptied whenever: - # - the combined size of the stored records exceeds byteLimit or - # - the number of stored records exceeds recordLimit or - # - the time in milliseconds since it was last emptied exceeds timeLimit buffer { byteLimit = 5242880 recordLimit = 1 @@ -110,31 +54,18 @@ streams { elasticsearch { - # Events are indexed using an Elasticsearch Client - # - endpoint: the cluster endpoint - # - port: the port the cluster can be accessed on - # - for http this is usually 9200 - # - for transport this is usually 9300 - # - max-timeout: the maximum attempt time before a client restart - # - ssl: if using the http client, whether to use ssl or not client { - endpoint = "localhost" + endpoint = elasticsearch port = 9200 maxTimeout = 10000 ssl = false } - # When using the AWS ES service - # - signing: if using the http client and the AWS ES service you can sign your requests - # http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html - # - region where the AWS ES service is located aws { signing = false region = "" } - # index: the Elasticsearch index name - # type: the Elasticsearch index type cluster { name = elasticsearch index = bad diff --git a/provisioning/resources/configs/snowplow-es-loader-good.hocon b/provisioning/resources/configs/snowplow-es-loader-good.hocon index 35b579f7..0ddd2d0d 100644 --- a/provisioning/resources/configs/snowplow-es-loader-good.hocon +++ b/provisioning/resources/configs/snowplow-es-loader-good.hocon @@ -14,93 +14,37 @@ # This file (config.hocon.sample) contains a template with # configuration options for the Elasticsearch Loader. -# Sources currently supported are: -# "kinesis" for reading records from a Kinesis stream -# "stdin" for reading unencoded tab-separated events from stdin -# If set to "stdin", JSON documents will not be sent to Elasticsearch -# but will be written to stdout. -# "nsq" for reading unencoded tab-separated events from NSQ source = nsq -# Where to write good and bad records sink { - # Sinks currently supported are: - # "elasticsearch" for writing good records to Elasticsearch - # "stdout" for writing good records to stdout good = elasticsearch - - # Sinks currently supported are: - # "kinesis" for writing bad records to Kinesis - # "stderr" for writing bad records to stderr - # "nsq" for writing bad records to NSQ - # "none" for ignoring bad records bad = nsq } -# "good" for a stream of successfully enriched events -# "bad" for a stream of bad events -# "plain-json" for writing plain json enabled = good -# The following are used to authenticate for the Amazon Kinesis sink. -# -# If both are set to "default", the default provider chain is used -# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) -# -# If both are set to "iam", use AWS IAM Roles to provision credentials. -# -# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY aws { accessKey = "" secretKey = "" } -# config for NSQ nsq { - # Channel name for NSQ source channelName = ESLoaderChannelGood - - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd + host = nsqlookupd port = 4150 - - # HTTP port for nsqlookupd lookupPort = 4161 } kinesis { - # "LATEST": most recent data. - # "TRIM_HORIZON": oldest available data. - # "AT_TIMESTAMP": Start from the record at or after the specified timestamp - # Note: This only affects the first run of this application on a stream. initialPosition = TRIM_HORIZON - - # Maximum number of records to get from Kinesis per call to GetRecords maxRecords = 1000 - - # Region where the Kinesis stream is located region = "" - - # "appName" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}" appName = "" } -# Common configuration section for all stream sources streams { inStreamName = EnrichedEvents - - # Stream for enriched events which are rejected by Elasticsearch outStreamName = BadElasticsearchEvents - - # Events are accumulated in a buffer before being sent to Elasticsearch. - # Note: Buffering is not supported by NSQ; will be ignored - # The buffer is emptied whenever: - # - the combined size of the stored records exceeds byteLimit or - # - the number of stored records exceeds recordLimit or - # - the time in milliseconds since it was last emptied exceeds timeLimit buffer { byteLimit: 5242880 recordLimit: 1 @@ -110,31 +54,18 @@ streams { elasticsearch { - # Events are indexed using an Elasticsearch Client - # - endpoint: the cluster endpoint - # - port: the port the cluster can be accessed on - # - for http this is usually 9200 - # - for transport this is usually 9300 - # - max-timeout: the maximum attempt time before a client restart - # - ssl: if using the http client, whether to use ssl or not client { - endpoint = "localhost" + endpoint = elasticsearch port = 9200 maxTimeout = 10000 ssl = false } - # When using the AWS ES service - # - signing: if using the http client and the AWS ES service you can sign your requests - # http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html - # - region where the AWS ES service is located aws { signing = false region = "" } - # index: the Elasticsearch index name - # type: the Elasticsearch index type cluster { name = "elasticsearch" index = "good" diff --git a/provisioning/resources/configs/snowplow-stream-collector.hocon b/provisioning/resources/configs/snowplow-stream-collector.hocon index efea8200..6d712b09 100644 --- a/provisioning/resources/configs/snowplow-stream-collector.hocon +++ b/provisioning/resources/configs/snowplow-stream-collector.hocon @@ -18,69 +18,49 @@ # 'collector' contains configuration options for the main Scala collector. collector { - # The collector runs as a web service specified on the following - # interface and port. interface = "0.0.0.0" port = 8080 - # Configure the P3P policy header. p3p { policyRef = "/w3c/p3p.xml" CP = "NOI DSP COR NID PSA OUR IND COM NAV STA" } - # The collector returns a cookie to clients for user identification - # with the following domain and expiration. + crossDomain { + enabled = false + domain = "*" + secure = true + } + cookie { enabled = true expiration = "365 days" # e.g. "365 days" - # Network cookie name name = sp - # The domain is optional and will make the cookie accessible to other - # applications on the domain. Comment out this line to tie cookies to - # the collector's full domain domain = "" } - # When enabled and the cookie specified above is missing, performs a redirect to itself to check - # if third-party cookies are blocked using the specified name. If they are indeed blocked, - # fallbackNetworkId is used instead of generating a new random one. cookieBounce { enabled = false - # The name of the request parameter which will be used on redirects checking that third-party - # cookies work. name = "n3pc" - # Network user id to fallback to when third-party cookies are blocked. fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000" } + redirectMacro { + enabled = false + placeholder = "[TOKEN]" + } + streams { - # Events which have successfully been collected will be stored in the good stream/topic good = RawEvents - - # Events that are too big (w.r.t Kinesis 1MB limit) will be stored in the bad stream/topic bad = BadRawEvents - - # Whether to use the incoming event's ip as the partition key for the good stream/topic useIpAddressAsPartitionKey = false - # config for NSQ sink sink { enabled = nsq - - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd + host = nsqd port = 4150 } - # Incoming events are stored in a buffer before being sent to Kinesis/Kafka. - # Note: Buffering is not supported by NSQ. - # The buffer is emptied whenever: - # - the number of stored records reaches record-limit or - # - the combined size of the stored records reaches byte-limit or - # - the time in milliseconds since the buffer was last emptied reaches time-limit buffer { byteLimit = 4000000 recordLimit = 500 # Not supported by Kafka; will be ignored @@ -89,23 +69,13 @@ collector { } } -# Akka has a variety of possible configuration options defined at -# http://doc.akka.io/docs/akka/current/scala/general/configuration.html akka { loglevel = DEBUG # 'OFF' for no logging, 'DEBUG' for all logging. loggers = ["akka.event.slf4j.Slf4jLogger"] - # akka-http is the server the Stream collector uses and has configurable options defined at - # http://doc.akka.io/docs/akka-http/current/scala/http/configuration.html http.server { - # To obtain the hostname in the collector, the 'remote-address' header - # should be set. By default, this is disabled, and enabling it - # adds the 'Remote-Address' header to every request automatically. remote-address-header = on - raw-request-uri-header = on - - # Define the maximum request length (the default is 2048) parsing { max-uri-length = 32768 uri-parsing-mode = relaxed diff --git a/provisioning/resources/configs/snowplow-stream-enrich.hocon b/provisioning/resources/configs/snowplow-stream-enrich.hocon index 353095bb..b653c604 100644 --- a/provisioning/resources/configs/snowplow-stream-enrich.hocon +++ b/provisioning/resources/configs/snowplow-stream-enrich.hocon @@ -19,38 +19,21 @@ enrich { streams { in { - # Stream/topic where the raw events to be enriched are located raw = RawEvents } out { - # Stream/topic where the events that were successfully enriched will end up enriched = EnrichedEvents - # Stream/topic where the event that failed enrichment will be stored bad = BadEnrichedEvents - - # How the output stream/topic will be partitioned. - # Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid, - # user_ipaddress, domain_sessionid, user_fingerprint. - # Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the - # possible partition keys correspond to. - # Otherwise, the partition key will be a random UUID. - # Note: Nsq does not make use of partition key. partitionKey = "" } sourceSink { enabled = nsq - - # Channel name for nsq source rawChannel = StreamEnrichChannel - # Host name for nsqd - host = "127.0.0.1" - # TCP port for nsqd, 4150 by default + host = nsqd port = 4150 - # Host name for lookupd - lookupHost = "127.0.0.1" - # HTTP port for nsqlookupd, 4161 by default + lookupHost = nsqlookupd lookupPort = 4161 } @@ -60,6 +43,6 @@ enrich { timeLimit = 5000 } - appName = "" + appName = "snowplow-stream-enrich" } } diff --git a/provisioning/resources/control-plane/change_credentials_test.go b/provisioning/resources/control-plane/change_credentials_test.go index e186fe01..b71f1466 100644 --- a/provisioning/resources/control-plane/change_credentials_test.go +++ b/provisioning/resources/control-plane/change_credentials_test.go @@ -38,7 +38,6 @@ func TestChangeCredentials(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` expectedCaddyConfigHeadAfter := @@ -49,7 +48,6 @@ func TestChangeCredentials(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` dir, err := ioutil.TempDir("", "testDir") diff --git a/provisioning/resources/control-plane/change_domain_name_test.go b/provisioning/resources/control-plane/change_domain_name_test.go index 44075d27..c71a7df8 100644 --- a/provisioning/resources/control-plane/change_domain_name_test.go +++ b/provisioning/resources/control-plane/change_domain_name_test.go @@ -38,7 +38,6 @@ func TestChangeDomainName(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` expectedCaddyConfigHeadAfter := @@ -49,7 +48,6 @@ func TestChangeDomainName(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` dir, err := ioutil.TempDir("", "testDir") diff --git a/provisioning/resources/control-plane/local_iglu.go b/provisioning/resources/control-plane/local_iglu.go index d1a316fb..381fbf95 100644 --- a/provisioning/resources/control-plane/local_iglu.go +++ b/provisioning/resources/control-plane/local_iglu.go @@ -67,6 +67,7 @@ func (li LocalIglu) addApiKeyToConfig() error { } func (li LocalIglu) insertApiKeyToDb() error { + db := pg.Connect(&pg.Options{ User: li.Psql.User, Password: li.Psql.Password, diff --git a/provisioning/resources/control-plane/main.go b/provisioning/resources/control-plane/main.go index 5280100c..98702ea5 100644 --- a/provisioning/resources/control-plane/main.go +++ b/provisioning/resources/control-plane/main.go @@ -209,6 +209,7 @@ func addLocalIgluApikey(resp http.ResponseWriter, req *http.Request) { IgluApikey: igluApikey, Psql: psqlInfos, } + err := localIglu.addApiKey() if err != nil { http.Error(resp, err.Error(), 500) diff --git a/provisioning/resources/control-plane/restart_services.go b/provisioning/resources/control-plane/restart_services.go index dbef2a08..0b2ce492 100644 --- a/provisioning/resources/control-plane/restart_services.go +++ b/provisioning/resources/control-plane/restart_services.go @@ -35,14 +35,24 @@ func restartService(service string) error { } if val, ok := initMap[service]; ok { - restartCommand := []string{"service", val, "restart"} - - cmd := exec.Command("/bin/bash", restartCommand...) - err := cmd.Run() - if err != nil { - return err + if service == "caddy" { + restartCommand := []string{"service", val, "restart"} + cmd := exec.Command("/bin/bash", restartCommand...) + err := cmd.Run() + if err != nil { + return err + } + return nil + } else { + restartCommandArgs := []string{"-f", "/home/ubuntu/snowplow/docker-compose.yml", + "restart", val} + cmd := exec.Command("/usr/local/bin/docker-compose", restartCommandArgs...) + err := cmd.Run() + if err != nil { + return err + } + return nil } - return nil } return errors.New("unrecognized service") } diff --git a/provisioning/resources/elasticsearch/config/elasticsearch.yml b/provisioning/resources/elasticsearch/config/elasticsearch.yml new file mode 100644 index 00000000..581d3697 --- /dev/null +++ b/provisioning/resources/elasticsearch/config/elasticsearch.yml @@ -0,0 +1,33 @@ +# ======================== Elasticsearch Configuration ========================= +# +# NOTE: Elasticsearch comes with reasonable defaults for most settings. +# Before you set out to tweak and tune the configuration, make sure you +# understand what are you trying to accomplish and the consequences. +# +# The primary way of configuring a node is via this file. This template lists +# the most important settings you may want to configure for a production cluster. +# +# Please consult the documentation for further information on configuration options: +# https://www.elastic.co/guide/en/elasticsearch/reference/index.html +# +# ---------------------------------- Cluster ----------------------------------- +# +# Use a descriptive name for your cluster: +# +cluster.name: "sp-mini-es-cluster" +# +# ------------------------------------ Node ------------------------------------ +# +# Use a descriptive name for the node: +# +node.name: "sp-mini-es-node" +# ---------------------------------- Network ----------------------------------- +# +# Set the bind address to a specific IP (IPv4 or IPv6): +# +network.host: 0.0.0.0 +# --------------------------------- Discovery ---------------------------------- +# +# Prevent the "split brain" by configuring the majority of nodes (total number of master-eligible nodes / 2 + 1): +# +discovery.zen.minimum_master_nodes: 1 \ No newline at end of file diff --git a/provisioning/resources/elasticsearch/config/log4j2.properties b/provisioning/resources/elasticsearch/config/log4j2.properties new file mode 100644 index 00000000..8c5cae8b --- /dev/null +++ b/provisioning/resources/elasticsearch/config/log4j2.properties @@ -0,0 +1,28 @@ +status = error + +appender.console.type = Console +appender.console.name = console +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] %marker%m%n + +appender.rolling.type = RollingFile +appender.rolling.name = rolling +appender.rolling.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}.log +appender.rolling.layout.type = PatternLayout +appender.rolling.layout.pattern = [%d{ISO8601}][%-5p][%-25c] %.10000m%n +appender.rolling.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}.log.zip +appender.rolling.policies.type = Policies +appender.rolling.policies.time.type = TimeBasedTriggeringPolicy +appender.rolling.policies.time.interval = 1 +appender.rolling.policies.time.modulate = true + +appender.rolling.strategy.type = DefaultRolloverStrategy +appender.rolling.strategy.action.type = Delete +appender.rolling.strategy.action.basepath = ${sys:es.logs.base_path} +appender.rolling.strategy.action.condition.type = IfLastModified +appender.rolling.strategy.action.condition.age = 7D +appender.rolling.strategy.action.PathConditions.type = IfFileName +appender.rolling.strategy.action.PathConditions.glob = ${sys:es.logs.cluster_name}-* + +rootLogger.level = info +rootLogger.appenderRef.console.ref = console diff --git a/provisioning/resources/elasticsearch/bad-mapping.json b/provisioning/resources/elasticsearch/mapping/bad-mapping.json similarity index 72% rename from provisioning/resources/elasticsearch/bad-mapping.json rename to provisioning/resources/elasticsearch/mapping/bad-mapping.json index ee8740d0..7b96de74 100644 --- a/provisioning/resources/elasticsearch/bad-mapping.json +++ b/provisioning/resources/elasticsearch/mapping/bad-mapping.json @@ -14,23 +14,15 @@ }, "mappings": { "bad": { - "_timestamp" : { - "enabled" : "yes", - "path" : "failure_tstamp" - }, - "_ttl": { - "enabled": true, - "default": "604800000" - }, "properties": { "errors": { "properties": { "message" : { - "type": "string", + "type": "text", "analyzer": "standard" }, "level" : { - "type": "string", + "type": "text", "analyzer": "standard" } } @@ -40,7 +32,7 @@ "format": "dateOptionalTime" }, "line": { - "type": "string", + "type": "text", "analyzer": "standard" } } diff --git a/provisioning/resources/elasticsearch/good-mapping.json b/provisioning/resources/elasticsearch/mapping/good-mapping.json similarity index 57% rename from provisioning/resources/elasticsearch/good-mapping.json rename to provisioning/resources/elasticsearch/mapping/good-mapping.json index 1102d531..4437a784 100644 --- a/provisioning/resources/elasticsearch/good-mapping.json +++ b/provisioning/resources/elasticsearch/mapping/good-mapping.json @@ -14,29 +14,21 @@ }, "mappings": { "good": { - "_timestamp" : { - "enabled" : "yes", - "path" : "collector_tstamp" - }, - "_ttl": { - "enabled": true, - "default": "604800000" - }, "properties": { "app_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_colordepth": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_cookies": { "type": "boolean" }, "br_family": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_features_director": { "type": "boolean" @@ -66,24 +58,24 @@ "type": "boolean" }, "br_lang": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_name": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_renderengine": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_type": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_version": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_viewheight": { "type": "long" @@ -96,8 +88,8 @@ "format": "dateOptionalTime" }, "doc_charset": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "doc_height": { "type": "long" @@ -106,15 +98,15 @@ "type": "long" }, "domain_sessionid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "domain_sessionidx": { "type": "long" }, "domain_userid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "dvce_ismobile": { "type": "boolean" @@ -134,106 +126,106 @@ "format": "dateOptionalTime" }, "dvce_type": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "etl_tstamp": { "type": "date", "format": "dateOptionalTime" }, "event": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "event_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "geo_location": { "type": "geo_point" }, "mkt_campaign": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_content": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_medium": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_source": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_term": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "name_tracker": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "network_userid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_family": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_manufacturer": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_name": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_timezone": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_referrer": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_title": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_url": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlfragment": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlhost": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlpath": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlport": { "type": "long" }, "page_urlquery": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlscheme": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "platform": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "pp_xoffset_max": { "type": "long" @@ -248,79 +240,79 @@ "type": "long" }, "refr_medium": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_source": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_term": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlfragment": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlhost": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlpath": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlport": { "type": "long" }, "refr_urlquery": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlscheme": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_action": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_category": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_label": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_fingerprint": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_ipaddress": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "useragent": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_collector": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_etl": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_tracker": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true } } } diff --git a/provisioning/resources/init/create.sh b/provisioning/resources/init/create.sh new file mode 100755 index 00000000..004ca75c --- /dev/null +++ b/provisioning/resources/init/create.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# Creates Elasticsearch indexes with their mappings +# Followed by Kibana index patterns +# Followed by NSQ topics + +curl -X PUT localhost:9200/good -H 'application/json' -d @/home/ubuntu/snowplow/elasticsearch/mapping/good-mapping.json && \ +curl -X PUT localhost:9200/bad -H 'application/json' -d @/home/ubuntu/snowplow/elasticsearch/mapping/bad-mapping.json && \ +curl -X PUT localhost:9200/.kibana/index-pattern/good -d '{"title":"good", "timeFieldName":"collector_tstamp"}' && \ +curl -X PUT localhost:9200/.kibana/index-pattern/bad -d '{"title":"bad", "timeFieldName":"failure_tstamp"}' && \ +curl -X PUT localhost:9200/.kibana/config/5.6.10 -d '{"defaultIndex":"good"}' && \ +curl -X POST localhost:4151/topic/create?topic=RawEvents && \ +curl -X POST localhost:4151/topic/create?topic=BadEvents && \ +curl -X POST localhost:4151/topic/create?topic=EnrichedEvents && \ +curl -X POST localhost:4151/topic/create?topic=BadEnrichedEvents \ No newline at end of file diff --git a/provisioning/resources/init/iglu-server-init.sql b/provisioning/resources/init/iglu-server-init.sql new file mode 100644 index 00000000..7e572822 --- /dev/null +++ b/provisioning/resources/init/iglu-server-init.sql @@ -0,0 +1,2 @@ +CREATE USER snowplow WITH PASSWORD 'snowplow'; +CREATE DATABASE iglu OWNER snowplow; diff --git a/provisioning/resources/init/wait-for-elasticsearch.sh b/provisioning/resources/init/wait-for-elasticsearch.sh new file mode 100755 index 00000000..2f63c0af --- /dev/null +++ b/provisioning/resources/init/wait-for-elasticsearch.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# wait-for-elasticsearch.sh + +set -e + +host="$1" +shift +cmd="$@" + +until curl "$host"; do + >&2 echo "Elasticsearch is unavailable - sleeping" + sleep 5 +done + +>&2 echo "Elasticsearch is up - executing command(s)" +exec $cmd \ No newline at end of file diff --git a/provisioning/resources/init/wait-for-postgres.sh b/provisioning/resources/init/wait-for-postgres.sh new file mode 100755 index 00000000..479a5d64 --- /dev/null +++ b/provisioning/resources/init/wait-for-postgres.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# wait-for-postgres.sh + +set -e + +host="$1" +shift +cmd="$@" + +until PGPASSWORD=snowplow psql -h "$host" -d "iglu" -U "snowplow" -c '\q'; do + >&2 echo "Postgres is unavailable - sleeping" + sleep 2 +done + +>&2 echo "Postgres is up - executing command" +exec docker-entrypoint.sh $cmd \ No newline at end of file diff --git a/provisioning/resources/ui/js/components/Elasticsearch.tsx b/provisioning/resources/ui/js/components/Elasticsearch.tsx index 0a7618f7..7a878d83 100644 --- a/provisioning/resources/ui/js/components/Elasticsearch.tsx +++ b/provisioning/resources/ui/js/components/Elasticsearch.tsx @@ -43,7 +43,6 @@ export class Elasticsearch extends React.Component<{}, {}> {

Quicklinks:

); diff --git a/provisioning/resources/ui/js/components/Overview.tsx b/provisioning/resources/ui/js/components/Overview.tsx index 8cf3a13c..4f086753 100644 --- a/provisioning/resources/ui/js/components/Overview.tsx +++ b/provisioning/resources/ui/js/components/Overview.tsx @@ -25,7 +25,6 @@ export class Overview extends React.Component<{}, {}> { var collector: string = location.protocol + '//' + window.location.host; var kibana: string = location.protocol + '//' + window.location.host + '/kibana/'; - var head_plugin: string = location.protocol + '//' + window.location.host + '/elasticsearch/_plugin/head/'; var elasticsearch: string = location.protocol + '//' + window.location.host + '/elasticsearch'; return ( @@ -38,7 +37,7 @@ export class Overview extends React.Component<{}, {}> {

You can send events into Snowplow Mini automatically from the Example events page. Simply go to that page and click the sample event buttons.

Alternatively, you can setup any of the Snowplow trackers to send data to this endpoint: {collector}

2. Viewing the events

-

You can view the events that have been sent to Elasticsearch in the Kibana Dashboard or the Head Plugin.

+

You can view the events that have been sent to Elasticsearch in the Kibana Dashboard or the Head Plugin.

You can also submit queries directly to the Elasticsearch endpoint.

3. Understanding how Snowplow Mini works

Quicklinks:

@@ -49,13 +48,14 @@ export class Overview extends React.Component<{}, {}> {

The software stack installed:

Stack topology:

diff --git a/provisioning/roles/docker/files/docker-compose.yml b/provisioning/roles/docker/files/docker-compose.yml new file mode 100644 index 00000000..6dac7ee8 --- /dev/null +++ b/provisioning/roles/docker/files/docker-compose.yml @@ -0,0 +1,171 @@ +version: "3" + +services: + elasticsearch: + image: elasticsearch:5.6.10 + container_name: elasticsearch + restart: always + environment: + - "bootstrap.memory_lock=true" + - "ES_JAVA_OPTS=-Xms4g -Xmx4g" + volumes: + - /home/ubuntu/snowplow/elasticsearch/data:/usr/share/elasticsearch/data + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + logging: + options: + max-size: "50M" + max-file: "10" + ports: + - "9200:9200" + + kibana: + image: kibana:5.6.10 + container_name: kibana + restart: always + ports: + - "5601:5601" + depends_on: + - elasticsearch + + elasticsearch-loader-good: + image: snowplow-docker-registry.bintray.io/snowplow/elasticsearch-loader:0.10.1 + container_name: elasticsearch-loader-good + command: [ "--config", "/snowplow/config/snowplow-es-loader-good.hocon" ] + restart: always + depends_on: + - elasticsearch + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xms512m -Xmx512m" + + elasticsearch-loader-bad: + image: snowplow-docker-registry.bintray.io/snowplow/elasticsearch-loader:0.10.1 + container_name: elasticsearch-loader-bad + command: [ "--config", "/snowplow/config/snowplow-es-loader-bad.hocon" ] + restart: always + depends_on: + - elasticsearch + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xms512m -Xmx512m" + + nsqlookupd: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqlookupd + command: /nsqlookupd + restart: always + logging: + options: + max-size: "1M" + max-file: "10" + ports: + - "4160:4160" + - "4161:4161" + + nsqd: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqd + command: /nsqd --lookupd-tcp-address=nsqlookupd:4160 --data-path=/home/ubuntu/snowplow/nsq-data + restart: always + volumes: + - /home/ubuntu/snowplow/nsq-data:/home/ubuntu/snowplow/nsq-data + depends_on: + - nsqlookupd + ports: + - "4150:4150" + - "4151:4151" + + nsqadmin: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqadmin + command: /nsqadmin --lookupd-http-address=nsqlookupd:4161 + restart: always + depends_on: + - nsqlookupd + ports: + - "4171:4171" + + scala-stream-collector: + image: snowplow-docker-registry.bintray.io/snowplow/scala-stream-collector-nsq:0.13.0 + container_name: scala-stream-collector-nsq + command: [ "--config", "/snowplow/config/snowplow-stream-collector.hocon" ] + restart: always + depends_on: + - nsqd + ports: + - "8080:8080" + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xms512m -Xmx512m" + + stream-enrich: + image: snowplow-docker-registry.bintray.io/snowplow/stream-enrich-nsq:0.16.1 + container_name: stream-enrich-nsq + command: [ + "--config", "/snowplow/config/snowplow-stream-enrich.hocon", + "--resolver", "file:/snowplow/config/iglu-resolver.json", + "--enrichments", "file:/snowplow/config/enrichments", + "--force-ip-lookups-download" + ] + restart: always + depends_on: + - scala-stream-collector + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xms512m -Xmx512m" + + postgres: + container_name: postgres + image: postgres:9.5 + restart: always + volumes: + - /home/ubuntu/snowplow/init/iglu-server-init.sql:/docker-entrypoint-initdb.d/init.sql + ports: + - "5433:5432" + logging: + options: + max-size: "1M" + max-file: "10" + + iglu-server: + container_name: iglu-server + image: snowplow-docker-registry.bintray.io/snowplow/iglu-server:0.3.0 + entrypoint: /snowplow/bin/wait-for-postgres.sh postgres --config /snowplow/config/iglu-server.conf + restart: always + depends_on: + - postgres + ports: + - "8081:8081" + volumes: + - /home/ubuntu/snowplow/init/wait-for-postgres.sh:/snowplow/bin/wait-for-postgres.sh + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" diff --git a/provisioning/roles/docker/tasks/main.yml b/provisioning/roles/docker/tasks/main.yml new file mode 100644 index 00000000..ddc5d771 --- /dev/null +++ b/provisioning/roles/docker/tasks/main.yml @@ -0,0 +1,53 @@ +--- +- include_vars: ../../common_vars.yml + +- name: Setup the docker repository and install docker + sudo: yes + shell: | + apt-get update + apt-get install apt-transport-https ca-certificates curl software-properties-common --yes + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" + apt-get update && apt-get install docker-ce --yes + +- name: Download docker-compose + sudo: yes + shell: curl -L https://github.com/docker/compose/releases/download/1.21.2/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose + +- name: Apply executable permissions to the docker-compose binary + sudo: yes + shell: chmod +x /usr/local/bin/docker-compose + +- name: Copy docker-compose.yml + copy: src={{ item.src }} dest={{ item.dest }} owner=ubuntu group=ubuntu mode=0644 + with_items: + - { src: '../files/docker-compose.yml', dest: '/home/ubuntu/snowplow/' } + +- name: Ensure file permissions + become: yes + shell: chown ubuntu:ubuntu -R /home/ubuntu/snowplow && chmod 755 -R /home/ubuntu/snowplow + +- name: Deploy snowplow mini + become: yes + shell: cd /home/ubuntu/snowplow/ && docker-compose up -d && sleep 20 + +- name: Wait for Elasticsearch port 9200 to become open on the host, don't start checking for 10 seconds + wait_for: + port: 9200 + delay: 10 + sleep: 5 + connect_timeout: 60 + +# Kibana 5.x has issues updating index patterns +# rebooting kibana and retrying works out +- name: Likely to fail attempt to create ES indexes & Kibana index patterns & NSQ topics + become: yes + shell: sh {{init_dir}}/create.sh + +- name: restart kibana for known index-creating issues + become: yes + shell: docker-compose -f {{main_dir}}/docker-compose.yml restart kibana + +- name: Create ES indexes & Kibana index patterns & NSQ topics + become: yes + shell: sh {{init_dir}}/create.sh diff --git a/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml b/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml index e382f4ba..8d711fbb 100644 --- a/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml +++ b/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml @@ -10,26 +10,8 @@ shell: 'adduser ubuntu --disabled-password --gecos "" ; passwd -d ubuntu' -- name: Insert logrotate configuration for Snowplow Services - become: yes - copy: - dest: "/etc/logrotate.d/snowplow-apps" - content: | - /var/log/snowplow*.log /var/log/snowplow*.err /var/log/nsq*.log /var/log/nsq*.err /var/log/iglu_server*.log /var/log/iglu_server*.err { - hourly - rotate 3 - missingok - notifempty - copytruncate - } - mode: 0644 - -- name: Change logrotate cron to hourly - become: yes - shell: 'mv /etc/cron.daily/logrotate /etc/cron.hourly && service cron restart' - - name: creating directories - file: path={{item}} state=directory + file: path={{item}} state=directory mode=0755 with_items: - "{{configs_dir}}" - "{{staging_dir}}" @@ -48,16 +30,19 @@ src: "{{playbook_dir}}/resources/elasticsearch" dest: "{{main_dir}}" recursive: yes + archive: no - synchronize: src: "{{playbook_dir}}/resources/configs" dest: "{{main_dir}}" recursive: yes + archive: no - synchronize: src: "{{playbook_dir}}/resources/init" dest: "{{main_dir}}" recursive: yes + archive: no - name: Install NTP to prevent clock drifts become: yes diff --git a/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml b/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml deleted file mode 100644 index ad7c423b..00000000 --- a/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml +++ /dev/null @@ -1,53 +0,0 @@ ---- -- include_vars: ../../common_vars.yml - -- name: Adding APT repository key - become: yes - apt_key: - id: ACCC4CF8 - url: https://www.postgresql.org/media/keys/ACCC4CF8.asc - tags: - - postgresql - - db - - repo - -- name: Add PostgreSQL official APT repository - become: yes - apt_repository: - repo: "deb http://apt.postgresql.org/pub/repos/apt/ {{ansible_distribution_release}}-pgdg main" - tags: - - postgresql - - db - - repo - -- name: Install acl for creating Postgresql user - become: yes - apt: - name: "acl" - state: present - update_cache: yes - cache_valid_time: 3600 - -- name: Install PostgreSQL - become: yes - apt: - name: "postgresql-9.5" - state: present - update_cache: yes - cache_valid_time: 3600 - tags: - - postgresql - - db - - deps - -- name: Install dependencies for the Ansible module - become: yes - apt: - name: "{{item}}" - state: latest - with_items: - - python-psycopg2 - tags: - - postgresql - - db - - deps diff --git a/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml b/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml index e8fe313b..6553abc5 100644 --- a/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml +++ b/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml @@ -3,43 +3,8 @@ - name: Set variables set_fact: - stream_collector_package: 'snowplow_scala_stream_collector_0.11.0.zip' - stream_enrich_package: 'snowplow_stream_enrich_nsq_0.16.1.zip' - es_loader_package: 'snowplow_elasticsearch_loader_http_0.10.1.zip' - iglu_server_package: 'iglu_server_0.3.0.zip' - kibana_v: '4.0.1' - nsq_package: 'nsq-1.0.0-compat.linux-amd64.go1.8.tar.gz' - nsq_bin_dir: 'nsq-1.0.0-compat.linux-amd64.go1.8/bin' control_plane_dir: '{{playbook_dir}}/resources/control-plane' -- name: Install unzip - become: yes - apt: - name: "unzip" - state: present - update_cache: yes - cache_valid_time: 3600 - -- name: Add Java 8 repository - become: yes - apt_repository: - repo: 'ppa:webupd8team/java' - state: present - -- name: Signed Oracle License - become: yes - shell: "echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | sudo /usr/bin/debconf-set-selections" - register: oracle_license_signed - -- name: Install Java 8 - become: yes - apt: - name: oracle-java8-installer - state: present - update_cache: yes - cache_valid_time: 3600 - when: oracle_license_signed|changed - - name: Copy Control API to executables dir become: yes synchronize: @@ -52,134 +17,6 @@ src: "{{playbook_dir}}/../VERSION" dest: "{{main_dir}}" -- name: Check Stream Collector - stat: - path: "{{staging_dir}}/{{stream_collector_package}}" - register: check_stream_collector_result - -- name: Download Stream Collector - get_url: - url: "http://dl.bintray.com/snowplow/snowplow-generic/{{stream_collector_package}}" - dest: "{{staging_dir}}" - when: check_stream_collector_result.stat.exists == False - register: stream_collector_downloaded - -- name: Unzip downloaded Stream Collector - shell: "unzip {{staging_dir}}/{{stream_collector_package}} -d {{executables_dir}}" - when: stream_collector_downloaded|changed - -- name: Check Stream Enrich - stat: - path: "{{staging_dir}}/{{stream_enrich_package}}" - register: check_stream_enrich_result - -- name: Download Stream Enrich - get_url: - url: "http://dl.bintray.com/snowplow/snowplow-generic/{{stream_enrich_package}}" - dest: "{{staging_dir}}" - when: check_stream_enrich_result.stat.exists == False - register: stream_enrich_downloaded - -- name: Unzip downloaded Stream Enrich - shell: "unzip {{staging_dir}}/{{stream_enrich_package}} -d {{executables_dir}}" - when: stream_enrich_downloaded|changed - -- name: Check Elasticsearch Loader - stat: - path: "{{staging_dir}}/{{es_loader_package}}" - register: check_es_loader_result - -- name: Download Elasticsearch Loader - get_url: - url: "http://bintray.com/artifact/download/snowplow/snowplow-generic/{{es_loader_package}}" - dest: "{{staging_dir}}" - when: check_es_loader_result.stat.exists == False - register: es_loader_downloaded - -- name: Unzip downloaded Elasticsearch Loader - shell: "unzip {{staging_dir}}/{{es_loader_package}} -d {{executables_dir}}" - when: es_loader_downloaded|changed - -- name: Check Iglu Server - stat: - path: "{{staging_dir}}/{{iglu_server_package}}" - register: check_iglu_server_result - -- name: Download Iglu Server - get_url: - url: "http://bintray.com/artifact/download/snowplow/snowplow-generic/{{iglu_server_package}}" - dest: "{{staging_dir}}" - when: check_iglu_server_result.stat.exists == False - register: iglu_server_downloaded - -- name: Unzip downloaded Iglu Server - shell: "unzip {{staging_dir}}/{{iglu_server_package}} -d {{executables_dir}}" - when: iglu_server_downloaded|changed - register: iglu_server_extracted - -- name: Download NSQ - get_url: - url: "https://s3.amazonaws.com/bitly-downloads/nsq/{{nsq_package}}" - dest: "{{staging_dir}}" - -- name: Unzip downloaded NSQ - shell: "tar xvfz {{staging_dir}}/{{nsq_package}} --directory {{staging_dir}}" - -- name: Copy NSQ binaries to executables_dir - shell: "cp {{staging_dir}}/{{nsq_bin_dir}}/nsqd {{staging_dir}}/{{nsq_bin_dir}}/nsqlookupd {{staging_dir}}/{{nsq_bin_dir}}/nsqadmin {{executables_dir}}" - -- name: Create snowplow user on Postgresql - become: true - become_user: postgres - postgresql_user: - name: snowplow - password: snowplow - -- name: Create iglu db on Postgresql - become: true - become_user: postgres - postgresql_db: - name: iglu - owner: snowplow - -- name: Download Elasticsearch - become: yes - shell: "wget https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.5.deb -P {{staging_dir}}" - -- name: Install Elasticsearch - become: yes - shell: "dpkg -i {{staging_dir}}/elasticsearch-1.7.5.deb" - -- name: Install Elasticsearch Head Plugin - become: yes - shell: "/usr/share/elasticsearch/bin/plugin --install mobz/elasticsearch-head" - -- name: Check Kibana - stat: - path: "{{staging_dir}}/kibana-{{kibana_v}}-linux-x64.zip" - register: check_kibana_result - -- name: Download Kibana - get_url: - url: "https://download.elasticsearch.org/kibana/kibana/kibana-{{kibana_v}}-linux-x64.zip" - dest: "{{staging_dir}}" - when: check_kibana_result.stat.exists == False - register: kibana_downloaded - -- name: Unzip downloaded Kibana package - become: yes - shell: "unzip {{staging_dir}}/kibana-{{kibana_v}}-linux-x64.zip -d /opt/" - when: kibana_downloaded|changed - register: kibana_unzipped - -- name: Symlink for kibana - become: yes - file: - src: "/opt/kibana-{{kibana_v}}-linux-x64" - dest: "/opt/kibana" - state: link - when: kibana_unzipped|changed - - name: Copy Caddy executable to executables dir become: yes environment: diff --git a/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml b/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml index 1197549e..df561b0b 100644 --- a/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml +++ b/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml @@ -9,31 +9,35 @@ dest: "/etc/init.d" mode: 0755 with_items: - - kibana4_init - snowplow_mini_control_plane_api - - snowplow_stream_collector - - snowplow_stream_enrich - - snowplow_elasticsearch_loader_good - - snowplow_elasticsearch_loader_bad - - iglu_server_0.3.0 - caddy_init - - nsqd_init - - nsqlookupd_init - - nsqadmin_init - name: Configure for inits for calling at boot time become: yes shell: "update-rc.d {{item}} defaults" with_items: - - kibana4_init - snowplow_mini_control_plane_api - - snowplow_stream_collector - - snowplow_stream_enrich - - snowplow_elasticsearch_loader_good - - snowplow_elasticsearch_loader_bad - - iglu_server_0.3.0 - - elasticsearch - caddy_init - - nsqd_init - - nsqlookupd_init - - nsqadmin_init + +- name: Start UI and Control Plane + become: yes + shell: service snowplow_mini_control_plane_api start && service caddy_init start + +- name: Increase mmap count to recommended 262144 for Elasticsearch + become: yes + shell: echo "vm.max_map_count=262144" >> /etc/sysctl.conf && service procps start + +- cron: + name: "Add cronjob to restart containers at system reboot" + special_time: reboot + job: /usr/local/bin/docker-compose -f /home/ubuntu/snowplow/docker-compose.yml restart && sleep 30 + +- cron: + name: "Add cronjob to crop ES good index's docs older than a week" + special_time: weekly + job: /usr/bin/curl -s -X POST http://localhost:9200/good/_delete_by_query -d '{ "query" :{ "range" :{ "collector_tstamp" :{ "lt" :"now-1w/d" } } } }' > /dev/null 2>&1 + +- cron: + name: "Add cronjob to crop ES bad index's docs older than a week" + special_time: weekly + job: /usr/bin/curl -s -X POST http://localhost:9200/bad/_delete_by_query -d '{ "query" :{ "range" :{ "failure_tstamp" :{ "lt" :"now-1w/d" } } } }' > /dev/null 2>&1 diff --git a/provisioning/roles/sp_mini_8_configure/tasks/main.yml b/provisioning/roles/sp_mini_8_configure/tasks/main.yml deleted file mode 100644 index 0cc4c509..00000000 --- a/provisioning/roles/sp_mini_8_configure/tasks/main.yml +++ /dev/null @@ -1,80 +0,0 @@ ---- -- include_vars: ../../common_vars.yml - -- name: Starting Elasticsearch - become: yes - service: - name: elasticsearch - state: started - register: ElasticsearchStarted - -- name: Wait for Elasticsearch port 9200 to become open on the host, don't start checking for 10 seconds - wait_for: - port: 9200 - delay: 10 - -- name: curl put good-mapping.json - shell: "curl -XPUT 'http://localhost:9200/good' -d @{{es_dir}}/good-mapping.json" - -- name: curl put bad-mapping.json - shell: "curl -XPUT 'http://localhost:9200/bad' -d @{{es_dir}}/bad-mapping.json" - -- name: Starting nsqd - become: yes - service: - name: nsqd_init - state: started - register: NsqdStarted - -- name: Starting nsqlookupd - become: yes - service: - name: nsqlookupd_init - state: started - register: NsqlookupdStarted - -- name: Starting nsqadmin - become: yes - service: - name: nsqadmin_init - state: started - register: NsqadminStarted - -- name: Wait for the NSQ services to start - wait_for: - port: "{{item}}" - delay: 1 - with_items: - - 4151 - - 4161 - - 4171 - -- name: Starting Kibana - become: yes - service: - name: kibana4_init - state: started - -- name: add "good" index pattern to Kibana - shell: > - curl -XPUT http://localhost:9200/.kibana/index-pattern/good -d '{"title" : "good", "timeFieldName" : "collector_tstamp"}' - -- name: add "bad" index pattern to Kibana - shell: > - curl -XPUT http://localhost:9200/.kibana/index-pattern/bad -d '{"title" : "bad", "timeFieldName" : "failure_tstamp"}' - -- name: make "good" index pattern default - shell: > - curl -XPUT http://localhost:9200/.kibana/config/4.0.1 -d '{"defaultIndex" : "good"}' - -- name: Create new topic for RawEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=RawEvents" - -- name: Create new topic for BadEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=BadEvents" - -- name: Create new topic for EnrichedEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=EnrichedEvents" - -- name: Create new topic for BadEnrichedEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=BadEnrichedEvents" diff --git a/provisioning/with_building_ui_and_go_projects.yml b/provisioning/with_building_ui_and_go_projects.yml index 8bf08842..15899f46 100644 --- a/provisioning/with_building_ui_and_go_projects.yml +++ b/provisioning/with_building_ui_and_go_projects.yml @@ -12,10 +12,9 @@ - typescript - packer - sp_mini_1_create_dirs - - sp_mini_2_install_postgresl - sp_mini_3_build_go_projects - sp_mini_4_setup_apps - sp_mini_5_build_ui - sp_mini_6_copy_ui_folders - sp_mini_7_setup_init - - sp_mini_8_configure + - docker diff --git a/provisioning/without_building_ui_and_go_projects.yml b/provisioning/without_building_ui_and_go_projects.yml index b7483aca..99402670 100644 --- a/provisioning/without_building_ui_and_go_projects.yml +++ b/provisioning/without_building_ui_and_go_projects.yml @@ -7,8 +7,7 @@ roles: - sp_mini_1_create_dirs - - sp_mini_2_install_postgresl - sp_mini_4_setup_apps - sp_mini_6_copy_ui_folders - sp_mini_7_setup_init - - sp_mini_8_configure + - docker diff --git a/utils/scripts/user_data.sh b/utils/scripts/user_data.sh index aa00037c..49412c51 100755 --- a/utils/scripts/user_data.sh +++ b/utils/scripts/user_data.sh @@ -8,7 +8,7 @@ password='password' iglu_server_super_uid='deadbeef-dead-beef-dead-beefdeadbeef' # DO NOT ALTER BELOW # -sudo service iglu_server_0.3.0 restart +sudo /usr/local/bin/docker-compose -f /home/ubuntu/snowplow/docker-compose.yml restart iglu-server sudo service snowplow_mini_control_plane_api restart sleep 10