diff --git a/.travis.yml b/.travis.yml index 2ea3a450..71757f5f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,6 @@ --- sudo: required +dist: trusty language: go go: @@ -20,3 +21,8 @@ before_script: script: - ./integration/integration_test.sh + +env: + global: + - secure: CP4KymCt5vhWEOGBYw+mBKxfd34PoGOybo5QdiGJ0tj5l6Tn8utPn/MSOMZLdlxdqUnPstnhsBNpGybnBSQIKC8TSZqMqgebw59Qq75bk2gNQ5NSv0iOjija9oKvBLaKgyXwwzL2o4JWnFTuFsHGyGLjSHoKy3sptfKIih3vwa0ey4SevElUtqpwT32AjCYW0L+iMP+kWRUEna6vdR4BTLrl8/UXAQRccQ8gmcpecfhqJL6sYgmK4oqgbhyMn8dvWVMIFsUjJ76rvJ5KvcGceNqJ07Yb8qaLKx/OBDn7I8GbRHrnxgLXF/7gU40jG+nOeYUTfdflMDmJTplk+Sgi2WxumpDdLuEJPge6kZMQGcZ5rHPS3dAXJ8APVn/SirHSVpjApdYkkoL7DXBFYPvIfpgKfqj2ofwKLoY4OZwOPU6+XCLQJkxxIszTPbyXaBofeB+CpLTIcU0nGn6NXAGT6ONDSlIKVNXotlhz9robtOeJ723tFgmUoTSvHHwXAal/3LBVjx2qnTg7pW3/9wcMaI+uaETdIcv3DBRpr8SfcObfYCMU7HOck3gfIIbn6YB8MbY3fceeZ8UPw9R9UyIvTVISOJW+SCCJoybEqxGnNhNXRTbn/anCS9dN1zesVCPZMl70bJyQNBkTW+sTusOASrfkYKvDiU/qkzUSx/Qlz0s= + - secure: gvyo8HuSuIEiSIZH7A2B0Nz6HJx04lTIV4QYx+bpEcCWailbgqSjDBRVwb6IdsQBsZsy79924Go3j4s0qtAOXGOLhHkmEE+bmLzT7Ejey9ENkQm5iAkKb2clYTKiDa94tB/Rqk+yW669HILttB/mcOAj+LFhBKb3jJJS0n0hDfYKsbleWSAVMyPw+V7SEQBc+9VTmJcq0YYYHS1Ie4g92QXxA33nLyGcOhCY7SS+gAqbMuQDhGa+Y0jh8Tz4YMVCd0tFLjsIB818DYE2YgRZerHUC8G7+kyIylV3/Nr1tEb/i6F8Ii6ET7GnzCnMJ0pj0vp/hsIsZBJeZUyUJEwtBx9798OeBWPrpuJtesTuNVO3DJmahrdIwe+fWOt5wFJFgZwvpmu12px+R8yTYkCl2oWSf+CNJi5fhb68m3D3p5JAqRNvvuKQE+zdX1pMCTmLWIjl8SOFN5ptP4kpDStziBsKyH9YQ8KUvR24kcyWO4u2pkRNgkI+SZTl83U/zPPKh/R1jGjm+TC9rlGCyj+OJIfl2hiLJKgBQtdSkJK0dz5ZQx9ZcGDPW88oeLUqJGRHQsPUVZB/cutrIwg2WIG4nBdCHhGG7t1u/hCr27aOTlJfy/3uA9cdkZCoVccwiwMcydtBmEgcYxjGbE378fXtBSpqPvuRs5r2dEpAbmcOmOU= diff --git a/CHANGELOG b/CHANGELOG index 14aa9cf6..42ee25f6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,19 @@ +Version 0.6.0 (2018-07-02) +-------------------------- +Bump Stream Enrich to 0.18.0 (#174) +Publish mini to bigger instance types (#173) +Add publication of image for Google Compute Engine (#36) +Add support for external Postgres for Iglu Server (#154) +Assemble the apps using Docker Compose (#23) +Extend copyright to 2018 (#164) +Add gitter badge (#157) +Install NTP (#167) +Add ami-factory credentials to .travis.yml (#143) +Fix typo in README (#160) +Expose nsqadmin UI (#144) +Update example user_data.sh (#148) +Ensure Stream Enrich resolver uses cacheTtl of one (#137) + Version 0.5.0 (2018-05-29) -------------------------- Bump Packer to 1.2.3 (#163) diff --git a/Packerfile.json b/Packerfile.json index 83dab255..375ee846 100644 --- a/Packerfile.json +++ b/Packerfile.json @@ -5,9 +5,9 @@ "ami_groups": [ "all" ], - "ami_name": "snowplow-mini-{{user `version`}}-{{ timestamp }}-hvm-ebs-amd64", + "ami_name": "snowplow-mini-{{user `aws_version`}}-{{user `sp_mini_size`}}-{{ timestamp }}-hvm-ebs-amd64", "ami_regions": "us-east-2,us-west-1,us-west-2,ca-central-1,eu-west-1,eu-central-1,eu-west-2,ap-southeast-1,ap-southeast-2,ap-northeast-2,ap-northeast-1,ap-south-1,sa-east-1", - "instance_type": "t2.medium", + "instance_type": "{{user `aws_instance_type`}}", "region": "us-east-1", "source_ami": "ami-58167327", "ssh_username": "ubuntu", @@ -16,6 +16,17 @@ "Release": "{{user `version`}}" }, "type": "amazon-ebs" + }, + { + "type": "googlecompute", + "image_description": "Snowplow Mini - The Snowplow Pipeline in a box", + "image_name": "snowplow-mini-{{user `gcp_version`}}-{{user `sp_mini_size`}}-{{timestamp}}", + "machine_type": "{{user `gcp_machine_type`}}", + "account_file": "account.json", + "project_id": "snowplow-images", + "source_image_family": "ubuntu-1404-lts", + "ssh_username": "ubuntu", + "zone": "us-central1-a" } ], "post-processors": [], @@ -26,6 +37,10 @@ } ], "variables": { - "version": "0.5.0" + "aws_version": "0.6.0", + "gcp_version": "0-6-0", + "gcp_machine_type": "{{env `GCP_MACHINE_TYPE`}}", + "aws_instance_type": "{{env `AWS_INSTANCE_TYPE`}}", + "sp_mini_size": "{{env `SP_MINI_SIZE`}}" } } diff --git a/README.md b/README.md index 29d6e10f..d608310b 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,13 @@ # Snowplow-Mini -[![Build Status][travis-image]][travis] [![Release][release-image]][releases] [![License][license-image]][license] +[![Join the chat at https://gitter.im/snowplow/snowplow-mini](https://badges.gitter.im/snowplow/snowplow-mini.svg)](https://gitter.im/snowplow/snowplow-mini?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[![Build Status][travis-image]][travis] +[![Release][release-image]][releases] +[![License][license-image]][license] An easily-deployable, single instance version of Snowplow that serves three use cases: -1. Gives a Snowplow consumer (e.g. an analyst / data team / marketing team) a way to quickly understand what Snowplow "does" i.e. what you put it at one end and take out of the other +1. Gives a Snowplow consumer (e.g. an analyst / data team / marketing team) a way to quickly understand what Snowplow "does" i.e. what you put in at one end and take out of the other 2. Gives developers new to Snowplow an easy way to start with Snowplow and understand how the different pieces fit together 3. Gives people running Snowplow a quick way to debug tracker updates (because they can) @@ -70,7 +73,7 @@ limitations under the License. [travis]: https://travis-ci.org/snowplow/snowplow-mini [travis-image]: https://travis-ci.org/snowplow/snowplow-mini.svg?branch=master -[release-image]: http://img.shields.io/badge/release-0.5.0-blue.svg?style=flat +[release-image]: http://img.shields.io/badge/release-0.6.0-blue.svg?style=flat [releases]: https://github.com/snowplow/snowplow-mini/releases [license-image]: http://img.shields.io/badge/license-Apache--2-blue.svg?style=flat diff --git a/VERSION b/VERSION index 8f0916f7..a918a2aa 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.5.0 +0.6.0 diff --git a/Vagrantfile b/Vagrantfile index 6274fc18..8b726478 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -10,6 +10,7 @@ Vagrant.configure("2") do |config| config.vm.network "forwarded_port", guest: 80, host: 2000 config.vm.network "forwarded_port", guest: 3000, host: 3000 + config.vm.network "forwarded_port", guest: 4171, host: 4171 config.vm.network "forwarded_port", guest: 8080, host: 8080 config.vm.network "forwarded_port", guest: 9200, host: 9200 config.vm.network "forwarded_port", guest: 5601, host: 5601 @@ -20,8 +21,8 @@ Vagrant.configure("2") do |config| vb.name = Dir.pwd().split("/")[-1] + "-" + Time.now.to_f.to_i.to_s vb.customize ["modifyvm", :id, "--natdnshostresolver1", "on"] vb.customize [ "guestproperty", "set", :id, "--timesync-threshold", 10000 ] - vb.memory = 4096 - vb.cpus = 1 + vb.memory = 8192 + vb.cpus = 2 end config.vm.provision :shell do |sh| @@ -30,7 +31,18 @@ Vagrant.configure("2") do |config| # Requires Vagrant 1.7.0+ config.push.define "publish", strategy: "local-exec" do |push| - push.script = "vagrant/push.bash" + push.inline = <<-SCRIPT + # comment/uncomment below to enable/disable pushing to AWS/GCP + # or configure instance type large/xlarge/xxlarge + + # vagrant/push.bash aws large + # vagrant/push.bash aws xlarge + # vagrant/push.bash aws xxlarge + + # vagrant/push.bash gcp large + # vagrant/push.bash gcp xlarge + # vagrant/push.bash gcp xxlarge + SCRIPT end end diff --git a/integration/integration_test.sh b/integration/integration_test.sh index 2d3b67ab..2f60541d 100755 --- a/integration/integration_test.sh +++ b/integration/integration_test.sh @@ -1,14 +1,5 @@ #!/bin/bash -sudo service elasticsearch start -sudo service iglu_server_0.2.0 start -sudo service snowplow_stream_collector start -sudo service snowplow_stream_enrich start -sudo service snowplow_elasticsearch_loader_good start -sudo service snowplow_elasticsearch_loader_bad start -sudo service kibana4_init start -sleep 15 - # Send good and bad events COUNTER=0 while [ $COUNTER -lt 10 ]; do @@ -16,7 +7,7 @@ while [ $COUNTER -lt 10 ]; do curl http://localhost:8080/i let COUNTER=COUNTER+1 done -sleep 60 +sleep 90 # Assertions good_count="$(curl --silent -XGET 'http://localhost:9200/good/good/_count' | python -c 'import json,sys;obj=json.load(sys.stdin);print obj["count"]')" diff --git a/provisioning/resources/configs/Caddyfile b/provisioning/resources/configs/Caddyfile index 05e74b5e..b4e3b638 100644 --- a/provisioning/resources/configs/Caddyfile +++ b/provisioning/resources/configs/Caddyfile @@ -4,12 +4,14 @@ /home /kibana /elasticsearch + /nsqadmin /control-plane - /_plugin } redir /home /home/ - redir /kibana /kibana/ redir /iglu-server /iglu-server/ + redir /kibana /kibana/ + redir /nsqadmin /nsqadmin/ + redir /elasticsearch /elasticsearch/ proxy / localhost:8080 @@ -20,16 +22,29 @@ proxy /kibana localhost:5601 { without /kibana } + proxy /app/kibana localhost:5601 + proxy /app/timelion localhost:5601 + proxy /bundles localhost:5601 + proxy /plugins localhost:5601 + proxy /ui localhost:5601 + proxy /api localhost:5601 proxy /iglu-server localhost:8081 { without /iglu-server } - proxy /api localhost:8081 + proxy /api-docs localhost:8081 + + proxy /nsqadmin localhost:4171 { + without /nsqadmin + } + proxy /static localhost:4171 + proxy /api/counter localhost:4171 + proxy /api/nodes localhost:4171 + proxy /api/topics localhost:4171 proxy /elasticsearch localhost:9200 { without /elasticsearch } - proxy /_plugin localhost:9200 proxy /control-plane localhost:10000 { without /control-plane diff --git a/provisioning/resources/configs/control-plane-api.toml b/provisioning/resources/configs/control-plane-api.toml index 9ba911fb..d15cb7da 100644 --- a/provisioning/resources/configs/control-plane-api.toml +++ b/provisioning/resources/configs/control-plane-api.toml @@ -12,17 +12,18 @@ config = "/home/ubuntu/snowplow/configs" #directory which all the configs are in [config_file_names] caddy = "Caddyfile" iglu_resolver = "iglu-resolver.json" +iglu_server = "iglu-server.conf" [init_scripts] -stream_collector = "snowplow_stream_collector" -stream_enrich = "snowplow_stream_enrich" -es_loader_good = "snowplow_elasticsearch_loader_good" -es_loader_bad = "snowplow_elasticsearch_loader_bad" -iglu = "iglu_server_0.2.0" +stream_collector = "scala-stream-collector" +stream_enrich = "stream-enrich" +es_loader_good = "elasticsearch-loader-good" +es_loader_bad = "elasticsearch-loader-bad" +iglu = "iglu-server" caddy = "caddy_init" [PSQL] user = "snowplow" password = "snowplow" database = "iglu" -adddress = "127.0.0.1:5432" +address = "127.0.0.1:5433" diff --git a/provisioning/resources/configs/enrichments/pii_enrichment_config.json b/provisioning/resources/configs/enrichments/pii_enrichment_config.json index c1c28aee..cecd1331 100644 --- a/provisioning/resources/configs/enrichments/pii_enrichment_config.json +++ b/provisioning/resources/configs/enrichments/pii_enrichment_config.json @@ -1,32 +1,24 @@ { - "schema": "iglu:com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/1-0-0", + "schema": "iglu:com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0", "data": { "vendor": "com.snowplowanalytics.snowplow.enrichments", "name": "pii_enrichment_config", - "enabled": false, + "emitEvent": true, + "enabled": true, "parameters": { "pii": [ - { - "pojo": { - "field": "user_id" - } - }, - { - "pojo": { - "field": "user_fingerprint" - } - }, { "json": { "field": "unstruct_event", - "schemaCriterion": "iglu:com.mailchimp/subscribe/jsonschema/1-0-*", - "jsonPath": "$.data.['email', 'ip_opt']" + "schemaCriterion": "iglu:com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-*-*", + "jsonPath": "$.data.['vendor']" } } ], "strategy": { "pseudonymize": { - "hashFunction": "SHA-256" + "hashFunction": "SHA-1", + "salt": "pepper123" } } } diff --git a/provisioning/resources/configs/iglu-resolver.json b/provisioning/resources/configs/iglu-resolver.json index 2b3ef941..fbf56217 100644 --- a/provisioning/resources/configs/iglu-resolver.json +++ b/provisioning/resources/configs/iglu-resolver.json @@ -1,7 +1,8 @@ { - "schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-1", + "schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-2", "data": { "cacheSize": 500, + "cacheTtl": 1, "repositories": [ { "name": "Iglu Central", @@ -23,7 +24,7 @@ ], "connection": { "http": { - "uri": "http://localhost:8081/api", + "uri": "http://iglu-server:8081/api", "apikey": "PLACEHOLDER" } } diff --git a/provisioning/resources/configs/iglu-server.conf b/provisioning/resources/configs/iglu-server.conf index 254cfef1..793e7838 100644 --- a/provisioning/resources/configs/iglu-server.conf +++ b/provisioning/resources/configs/iglu-server.conf @@ -1,4 +1,4 @@ -# Copyright (c) 2014 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2014-2018 Snowplow Analytics Ltd. All rights reserved. # # This program is licensed to you under the Apache License Version 2.0, and # you may not use this file except in compliance with the Apache License @@ -15,15 +15,19 @@ # the Iglu repository server. # 'repo-server' contains configuration options for the repo-server. +# interface on which the server will be running +# baseURL is address of deployment, ":/" address used for baseURL of Swagger UI +# port on which the server will be running repo-server { interface = "0.0.0.0" + baseURL = "0.0.0.0/iglu-server" port = 8081 } # 'postgres' contains configuration options for the postgre instance the server # is using postgres { - host = "localhost" + host = "postgres" port = 5432 dbname = "iglu" username = "snowplow" @@ -32,14 +36,16 @@ postgres { } akka { + loggers = ["akka.event.slf4j.Slf4jLogger"] loglevel = INFO log-dead-letters = off + stdout-loglevel = "DEBUG" + logging-filter = "akka.event.slf4j.Slf4jLoggingFilter" } -# spray-can is the HTTP server the Iglu repository server is built on. -spray.can { +akka.http { server { - request-timeout = 10s + request-timeout = 10 seconds remote-address-header = on parsing.uri-parsing-mode = relaxed } diff --git a/provisioning/resources/configs/snowplow-es-loader-bad.hocon b/provisioning/resources/configs/snowplow-es-loader-bad.hocon index 31372ae0..3021d182 100644 --- a/provisioning/resources/configs/snowplow-es-loader-bad.hocon +++ b/provisioning/resources/configs/snowplow-es-loader-bad.hocon @@ -1,4 +1,4 @@ -# Copyright (c) 2014-2017 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2014-2018 Snowplow Analytics Ltd. All rights reserved. # # This program is licensed to you under the Apache License Version 2.0, and # you may not use this file except in compliance with the Apache License @@ -14,93 +14,40 @@ # This file (config.hocon.sample) contains a template with # configuration options for the Elasticsearch Loader. -# Sources currently supported are: -# "kinesis" for reading records from a Kinesis stream -# "stdin" for reading unencoded tab-separated events from stdin -# If set to "stdin", JSON documents will not be sent to Elasticsearch -# but will be written to stdout. -# "nsq" for reading unencoded tab-separated events from NSQ source = nsq -# Where to write good and bad records sink { - # Sinks currently supported are: - # "elasticsearch" for writing good records to Elasticsearch - # "stdout" for writing good records to stdout good = elasticsearch - - # Sinks currently supported are: - # "kinesis" for writing bad records to Kinesis - # "stderr" for writing bad records to stderr - # "nsq" for writing bad records to NSQ - # "none" for ignoring bad records bad = none } -# "good" for a stream of successfully enriched events -# "bad" for a stream of bad events -# "plain-json" for writing plain json enabled = bad -# The following are used to authenticate for the Amazon Kinesis sink. -# -# If both are set to "default", the default provider chain is used -# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) -# -# If both are set to "iam", use AWS IAM Roles to provision credentials. -# -# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY aws { accessKey: "" secretKey: "" } -# config for NSQ nsq { - # Channel name for NSQ source channelName = ESLoaderChannelBad + + nsqdHost = "nsqd" + nsqdPort = 4150 - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd - port = 4150 - - # HTTP port for nsqlookupd - lookupPort = 4161 + nsqlookupdHost = "nsqlookupd" + nsqlookupdPort = 4161 } kinesis { - # "LATEST": most recent data. - # "TRIM_HORIZON": oldest available data. - # "AT_TIMESTAMP": Start from the record at or after the specified timestamp - # Note: This only affects the first run of this application on a stream. initialPosition= TRIM_HORIZON - - # Maximum number of records to get from Kinesis per call to GetRecords maxRecords = 1000 - - # Region where the Kinesis stream is located region = "" - - # "appName" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}" appName = "" } -# Common configuration section for all stream sources streams { inStreamName = BadEnrichedEvents - - # Stream for enriched events which are rejected by Elasticsearch outStreamName = BadElasticsearchEvents - - # Events are accumulated in a buffer before being sent to Elasticsearch. - # Note: Buffering is not supported by NSQ; will be ignored - # The buffer is emptied whenever: - # - the combined size of the stored records exceeds byteLimit or - # - the number of stored records exceeds recordLimit or - # - the time in milliseconds since it was last emptied exceeds timeLimit buffer { byteLimit = 5242880 recordLimit = 1 @@ -110,31 +57,18 @@ streams { elasticsearch { - # Events are indexed using an Elasticsearch Client - # - endpoint: the cluster endpoint - # - port: the port the cluster can be accessed on - # - for http this is usually 9200 - # - for transport this is usually 9300 - # - max-timeout: the maximum attempt time before a client restart - # - ssl: if using the http client, whether to use ssl or not client { - endpoint = "localhost" + endpoint = elasticsearch port = 9200 maxTimeout = 10000 ssl = false } - # When using the AWS ES service - # - signing: if using the http client and the AWS ES service you can sign your requests - # http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html - # - region where the AWS ES service is located aws { signing = false region = "" } - # index: the Elasticsearch index name - # type: the Elasticsearch index type cluster { name = elasticsearch index = bad diff --git a/provisioning/resources/configs/snowplow-es-loader-good.hocon b/provisioning/resources/configs/snowplow-es-loader-good.hocon index 4b4726bc..d9673059 100644 --- a/provisioning/resources/configs/snowplow-es-loader-good.hocon +++ b/provisioning/resources/configs/snowplow-es-loader-good.hocon @@ -1,4 +1,4 @@ -# Copyright (c) 2014-2017 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2014-2018 Snowplow Analytics Ltd. All rights reserved. # # This program is licensed to you under the Apache License Version 2.0, and # you may not use this file except in compliance with the Apache License @@ -14,93 +14,40 @@ # This file (config.hocon.sample) contains a template with # configuration options for the Elasticsearch Loader. -# Sources currently supported are: -# "kinesis" for reading records from a Kinesis stream -# "stdin" for reading unencoded tab-separated events from stdin -# If set to "stdin", JSON documents will not be sent to Elasticsearch -# but will be written to stdout. -# "nsq" for reading unencoded tab-separated events from NSQ source = nsq -# Where to write good and bad records sink { - # Sinks currently supported are: - # "elasticsearch" for writing good records to Elasticsearch - # "stdout" for writing good records to stdout good = elasticsearch - - # Sinks currently supported are: - # "kinesis" for writing bad records to Kinesis - # "stderr" for writing bad records to stderr - # "nsq" for writing bad records to NSQ - # "none" for ignoring bad records bad = nsq } -# "good" for a stream of successfully enriched events -# "bad" for a stream of bad events -# "plain-json" for writing plain json enabled = good -# The following are used to authenticate for the Amazon Kinesis sink. -# -# If both are set to "default", the default provider chain is used -# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) -# -# If both are set to "iam", use AWS IAM Roles to provision credentials. -# -# If both are set to "env", use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY aws { accessKey = "" secretKey = "" } -# config for NSQ nsq { - # Channel name for NSQ source channelName = ESLoaderChannelGood - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd - port = 4150 + nsqdHost = "nsqd" + nsqdPort = 4150 - # HTTP port for nsqlookupd - lookupPort = 4161 + nsqlookupdHost = "nsqlookupd" + nsqlookupdPort = 4161 } kinesis { - # "LATEST": most recent data. - # "TRIM_HORIZON": oldest available data. - # "AT_TIMESTAMP": Start from the record at or after the specified timestamp - # Note: This only affects the first run of this application on a stream. initialPosition = TRIM_HORIZON - - # Maximum number of records to get from Kinesis per call to GetRecords maxRecords = 1000 - - # Region where the Kinesis stream is located region = "" - - # "appName" is used for a DynamoDB table to maintain stream state. - # You can set it automatically using: "SnowplowElasticsearchSink-${sink.kinesis.in.stream-name}" appName = "" } -# Common configuration section for all stream sources streams { inStreamName = EnrichedEvents - - # Stream for enriched events which are rejected by Elasticsearch outStreamName = BadElasticsearchEvents - - # Events are accumulated in a buffer before being sent to Elasticsearch. - # Note: Buffering is not supported by NSQ; will be ignored - # The buffer is emptied whenever: - # - the combined size of the stored records exceeds byteLimit or - # - the number of stored records exceeds recordLimit or - # - the time in milliseconds since it was last emptied exceeds timeLimit buffer { byteLimit: 5242880 recordLimit: 1 @@ -110,31 +57,18 @@ streams { elasticsearch { - # Events are indexed using an Elasticsearch Client - # - endpoint: the cluster endpoint - # - port: the port the cluster can be accessed on - # - for http this is usually 9200 - # - for transport this is usually 9300 - # - max-timeout: the maximum attempt time before a client restart - # - ssl: if using the http client, whether to use ssl or not client { - endpoint = "localhost" + endpoint = elasticsearch port = 9200 maxTimeout = 10000 ssl = false } - # When using the AWS ES service - # - signing: if using the http client and the AWS ES service you can sign your requests - # http://docs.aws.amazon.com/general/latest/gr/signing_aws_api_requests.html - # - region where the AWS ES service is located aws { signing = false region = "" } - # index: the Elasticsearch index name - # type: the Elasticsearch index type cluster { name = "elasticsearch" index = "good" diff --git a/provisioning/resources/configs/snowplow-stream-collector.hocon b/provisioning/resources/configs/snowplow-stream-collector.hocon index fa5e3f0a..6d712b09 100644 --- a/provisioning/resources/configs/snowplow-stream-collector.hocon +++ b/provisioning/resources/configs/snowplow-stream-collector.hocon @@ -1,4 +1,4 @@ -# Copyright (c) 2013-2017 Snowplow Analytics Ltd. All rights reserved. +# Copyright (c) 2013-2018 Snowplow Analytics Ltd. All rights reserved. # # This program is licensed to you under the Apache License Version 2.0, and # you may not use this file except in compliance with the Apache License @@ -18,69 +18,49 @@ # 'collector' contains configuration options for the main Scala collector. collector { - # The collector runs as a web service specified on the following - # interface and port. interface = "0.0.0.0" port = 8080 - # Configure the P3P policy header. p3p { policyRef = "/w3c/p3p.xml" CP = "NOI DSP COR NID PSA OUR IND COM NAV STA" } - # The collector returns a cookie to clients for user identification - # with the following domain and expiration. + crossDomain { + enabled = false + domain = "*" + secure = true + } + cookie { enabled = true expiration = "365 days" # e.g. "365 days" - # Network cookie name name = sp - # The domain is optional and will make the cookie accessible to other - # applications on the domain. Comment out this line to tie cookies to - # the collector's full domain domain = "" } - # When enabled and the cookie specified above is missing, performs a redirect to itself to check - # if third-party cookies are blocked using the specified name. If they are indeed blocked, - # fallbackNetworkId is used instead of generating a new random one. cookieBounce { enabled = false - # The name of the request parameter which will be used on redirects checking that third-party - # cookies work. name = "n3pc" - # Network user id to fallback to when third-party cookies are blocked. fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000" } + redirectMacro { + enabled = false + placeholder = "[TOKEN]" + } + streams { - # Events which have successfully been collected will be stored in the good stream/topic good = RawEvents - - # Events that are too big (w.r.t Kinesis 1MB limit) will be stored in the bad stream/topic bad = BadRawEvents - - # Whether to use the incoming event's ip as the partition key for the good stream/topic useIpAddressAsPartitionKey = false - # config for NSQ sink sink { enabled = nsq - - # Host name for NSQ tools - host = "127.0.0.1" - - # TCP port for nsqd + host = nsqd port = 4150 } - # Incoming events are stored in a buffer before being sent to Kinesis/Kafka. - # Note: Buffering is not supported by NSQ. - # The buffer is emptied whenever: - # - the number of stored records reaches record-limit or - # - the combined size of the stored records reaches byte-limit or - # - the time in milliseconds since the buffer was last emptied reaches time-limit buffer { byteLimit = 4000000 recordLimit = 500 # Not supported by Kafka; will be ignored @@ -89,23 +69,13 @@ collector { } } -# Akka has a variety of possible configuration options defined at -# http://doc.akka.io/docs/akka/current/scala/general/configuration.html akka { loglevel = DEBUG # 'OFF' for no logging, 'DEBUG' for all logging. loggers = ["akka.event.slf4j.Slf4jLogger"] - # akka-http is the server the Stream collector uses and has configurable options defined at - # http://doc.akka.io/docs/akka-http/current/scala/http/configuration.html http.server { - # To obtain the hostname in the collector, the 'remote-address' header - # should be set. By default, this is disabled, and enabling it - # adds the 'Remote-Address' header to every request automatically. remote-address-header = on - raw-request-uri-header = on - - # Define the maximum request length (the default is 2048) parsing { max-uri-length = 32768 uri-parsing-mode = relaxed diff --git a/provisioning/resources/configs/snowplow-stream-enrich.hocon b/provisioning/resources/configs/snowplow-stream-enrich.hocon index 353095bb..d9b6b5a8 100644 --- a/provisioning/resources/configs/snowplow-stream-enrich.hocon +++ b/provisioning/resources/configs/snowplow-stream-enrich.hocon @@ -19,38 +19,22 @@ enrich { streams { in { - # Stream/topic where the raw events to be enriched are located raw = RawEvents } out { - # Stream/topic where the events that were successfully enriched will end up enriched = EnrichedEvents - # Stream/topic where the event that failed enrichment will be stored bad = BadEnrichedEvents - - # How the output stream/topic will be partitioned. - # Possible partition keys are: event_id, event_fingerprint, domain_userid, network_userid, - # user_ipaddress, domain_sessionid, user_fingerprint. - # Refer to https://github.com/snowplow/snowplow/wiki/canonical-event-model to know what the - # possible partition keys correspond to. - # Otherwise, the partition key will be a random UUID. - # Note: Nsq does not make use of partition key. + pii = PiiEvents partitionKey = "" } sourceSink { enabled = nsq - - # Channel name for nsq source rawChannel = StreamEnrichChannel - # Host name for nsqd - host = "127.0.0.1" - # TCP port for nsqd, 4150 by default + host = nsqd port = 4150 - # Host name for lookupd - lookupHost = "127.0.0.1" - # HTTP port for nsqlookupd, 4161 by default + lookupHost = nsqlookupd lookupPort = 4161 } @@ -60,6 +44,6 @@ enrich { timeLimit = 5000 } - appName = "" + appName = "snowplow-stream-enrich" } } diff --git a/provisioning/resources/control-plane/change_credentials.go b/provisioning/resources/control-plane/change_credentials.go index b7a551ef..833782c7 100644 --- a/provisioning/resources/control-plane/change_credentials.go +++ b/provisioning/resources/control-plane/change_credentials.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, diff --git a/provisioning/resources/control-plane/change_credentials_test.go b/provisioning/resources/control-plane/change_credentials_test.go index d26e4448..b71f1466 100644 --- a/provisioning/resources/control-plane/change_credentials_test.go +++ b/provisioning/resources/control-plane/change_credentials_test.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, @@ -38,7 +38,6 @@ func TestChangeCredentials(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` expectedCaddyConfigHeadAfter := @@ -49,7 +48,6 @@ func TestChangeCredentials(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` dir, err := ioutil.TempDir("", "testDir") diff --git a/provisioning/resources/control-plane/change_domain_name.go b/provisioning/resources/control-plane/change_domain_name.go index bd9c1102..0ab48ccf 100644 --- a/provisioning/resources/control-plane/change_domain_name.go +++ b/provisioning/resources/control-plane/change_domain_name.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, diff --git a/provisioning/resources/control-plane/change_domain_name_test.go b/provisioning/resources/control-plane/change_domain_name_test.go index d3c735a5..c71a7df8 100644 --- a/provisioning/resources/control-plane/change_domain_name_test.go +++ b/provisioning/resources/control-plane/change_domain_name_test.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, @@ -38,7 +38,6 @@ func TestChangeDomainName(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` expectedCaddyConfigHeadAfter := @@ -49,7 +48,6 @@ func TestChangeDomainName(t *testing.T) { /kibana /elasticsearch /control-plane - /_plugin } ` dir, err := ioutil.TempDir("", "testDir") diff --git a/provisioning/resources/control-plane/external_iglu.go b/provisioning/resources/control-plane/external_iglu.go index 8f868868..74b34a18 100644 --- a/provisioning/resources/control-plane/external_iglu.go +++ b/provisioning/resources/control-plane/external_iglu.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, diff --git a/provisioning/resources/control-plane/external_iglu_test.go b/provisioning/resources/control-plane/external_iglu_test.go index 6cee70a7..63074877 100644 --- a/provisioning/resources/control-plane/external_iglu_test.go +++ b/provisioning/resources/control-plane/external_iglu_test.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, diff --git a/provisioning/resources/control-plane/iglu_resolver_model.go b/provisioning/resources/control-plane/iglu_resolver_model.go index 738a2939..c64d9eb7 100644 --- a/provisioning/resources/control-plane/iglu_resolver_model.go +++ b/provisioning/resources/control-plane/iglu_resolver_model.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, diff --git a/provisioning/resources/control-plane/local_iglu.go b/provisioning/resources/control-plane/local_iglu.go index 8bd0e2bf..0cae6c6f 100644 --- a/provisioning/resources/control-plane/local_iglu.go +++ b/provisioning/resources/control-plane/local_iglu.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, @@ -51,9 +51,7 @@ func (li LocalIglu) addApiKeyToConfig() error { for i, repo := range igluConf.Data.Repos { igluUri := repo.Conn.Http["uri"] - if strings.Contains(igluUri, "localhost") || - strings.Contains(igluUri, "127.0.0.1") { - + if strings.Contains(igluUri, "iglu-server") { igluConf.Data.Repos[i].Conn.Http["apikey"] = li.IgluApikey } } @@ -67,6 +65,7 @@ func (li LocalIglu) addApiKeyToConfig() error { } func (li LocalIglu) insertApiKeyToDb() error { + db := pg.Connect(&pg.Options{ User: li.Psql.User, Password: li.Psql.Password, diff --git a/provisioning/resources/control-plane/local_iglu_test.go b/provisioning/resources/control-plane/local_iglu_test.go index 8c300b2a..9ff6b403 100644 --- a/provisioning/resources/control-plane/local_iglu_test.go +++ b/provisioning/resources/control-plane/local_iglu_test.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, diff --git a/provisioning/resources/control-plane/main.go b/provisioning/resources/control-plane/main.go index a99fdfe8..6ca7aaf5 100644 --- a/provisioning/resources/control-plane/main.go +++ b/provisioning/resources/control-plane/main.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, @@ -35,7 +35,8 @@ var configPath string var config ControlPlaneConfig func main() { - configFlag := flag.String("config", "", "Control Plane API config file") + configFlag := flag.String("config", "/home/ubuntu/snowplow/configs/control-plane-api.toml", + "Control Plane API config file") flag.Parse() configPath = *configFlag @@ -45,6 +46,7 @@ func main() { http.HandleFunc("/restart-services", restartServices) http.HandleFunc("/enrichments", uploadEnrichments) + http.HandleFunc("/iglu-config", uploadIgluConfig) http.HandleFunc("/external-iglu", addExternalIgluServer) http.HandleFunc("/local-iglu-apikey", addLocalIgluApikey) http.HandleFunc("/credentials", changeUsernameAndPassword) @@ -115,6 +117,56 @@ func uploadEnrichments(resp http.ResponseWriter, req *http.Request) { } } +func uploadIgluConfig(resp http.ResponseWriter, req *http.Request) { + if req.Method == "POST" { + // maxMemory bytes of body's file parts are stored in memory, + // with the remainder stored on disk in temporary files + var maxMemory int64 = 32 << 20 + err := req.ParseMultipartForm(maxMemory) + + if err != nil { + http.Error(resp, err.Error(), 500) + return + } + + file, _, err := req.FormFile("igluserverhocon") + if err != nil { + http.Error(resp, err.Error(), 500) + return + } + defer file.Close() + + fileContentBytes, err := ioutil.ReadAll(file) + fileContent := string(fileContentBytes) + f, err := os.OpenFile(config.Dirs.Config+"/"+config.ConfigNames.IgluServer, os.O_WRONLY|os.O_CREATE, 0666) + if err != nil { + http.Error(resp, err.Error(), 500) + return + } + defer f.Close() + + // Uploaded Iglu Server configuration can be shorter than existing one + // Which would make iglu server configuration invalid + // Truncating to 0 bytes and seeking I/O offset to the beginning + // Prevents that possibility + f.Truncate(0) + f.Seek(0, 0) + // Now we can write to config file in peace + io.WriteString(f, fileContent) + + err = restartService("iglu") + if err != nil { + http.Error(resp, err.Error(), 500) + return + } + + resp.WriteHeader(http.StatusOK) + io.WriteString(resp, "uploaded successfully") + } else { + http.Error(resp, "", 404) + } +} + func addExternalIgluServer(resp http.ResponseWriter, req *http.Request) { if req.Method == "POST" { req.ParseForm() @@ -209,6 +261,7 @@ func addLocalIgluApikey(resp http.ResponseWriter, req *http.Request) { IgluApikey: igluApikey, Psql: psqlInfos, } + err := localIglu.addApiKey() if err != nil { http.Error(resp, err.Error(), 500) diff --git a/provisioning/resources/control-plane/model.go b/provisioning/resources/control-plane/model.go index c7e537c0..00e41bed 100644 --- a/provisioning/resources/control-plane/model.go +++ b/provisioning/resources/control-plane/model.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, @@ -27,6 +27,7 @@ type directories struct { type configFileNames struct { Caddy string IgluResolver string `toml:"iglu_resolver"` + IgluServer string `toml:"iglu_server"` } type initScripts struct { diff --git a/provisioning/resources/control-plane/restart_services.go b/provisioning/resources/control-plane/restart_services.go index b87c4190..57c9f57d 100644 --- a/provisioning/resources/control-plane/restart_services.go +++ b/provisioning/resources/control-plane/restart_services.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, @@ -35,38 +35,34 @@ func restartService(service string) error { } if val, ok := initMap[service]; ok { - restartCommand := []string{"service", val, "restart"} - - cmd := exec.Command("/bin/bash", restartCommand...) - err := cmd.Run() - if err != nil { - return err + if service == "caddy" { + restartCommand := []string{"service", val, "restart"} + cmd := exec.Command("/bin/bash", restartCommand...) + err := cmd.Run() + if err != nil { + return err + } + return nil + } else { + restartCommandArgs := []string{"-f", "/home/ubuntu/snowplow/docker-compose.yml", + "restart", val} + cmd := exec.Command("/usr/local/bin/docker-compose", restartCommandArgs...) + err := cmd.Run() + if err != nil { + return err + } + return nil } - return nil } - return errors.New("unrecognized service") + return errors.New("unrecognized service: " + service) } func restartSPServices() error { - err := restartService("streamCollector") - if err != nil { - return err - } - - err = restartService("streamEnrich") - if err != nil { - return err - } - - err = restartService("esLoaderGood") + restartCommandArgs := []string{"-f", "/home/ubuntu/snowplow/docker-compose.yml", "restart"} + cmd := exec.Command("/usr/local/bin/docker-compose", restartCommandArgs...) + err := cmd.Run() if err != nil { return err } - - err = restartService("esLoaderBad") - if err != nil { - return err - } - return nil } diff --git a/provisioning/resources/control-plane/utils.go b/provisioning/resources/control-plane/utils.go index f2af946b..b2a23fe7 100644 --- a/provisioning/resources/control-plane/utils.go +++ b/provisioning/resources/control-plane/utils.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, diff --git a/provisioning/resources/control-plane/utils_test.go b/provisioning/resources/control-plane/utils_test.go index 28976918..2589a20a 100644 --- a/provisioning/resources/control-plane/utils_test.go +++ b/provisioning/resources/control-plane/utils_test.go @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. * All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, diff --git a/provisioning/resources/elasticsearch/config/elasticsearch.yml b/provisioning/resources/elasticsearch/config/elasticsearch.yml new file mode 100644 index 00000000..1f8f4c63 --- /dev/null +++ b/provisioning/resources/elasticsearch/config/elasticsearch.yml @@ -0,0 +1,33 @@ +# ======================== Elasticsearch Configuration ========================= +# +# NOTE: Elasticsearch comes with reasonable defaults for most settings. +# Before you set out to tweak and tune the configuration, make sure you +# understand what are you trying to accomplish and the consequences. +# +# The primary way of configuring a node is via this file. This template lists +# the most important settings you may want to configure for a production cluster. +# +# Please consult the documentation for further information on configuration options: +# https://www.elastic.co/guide/en/elasticsearch/reference/index.html +# +# ---------------------------------- Cluster ----------------------------------- +# +# Use a descriptive name for your cluster: +# +cluster.name: "sp-mini-es-cluster" +# +# ------------------------------------ Node ------------------------------------ +# +# Use a descriptive name for the node: +# +node.name: "sp-mini-es-node" +# ---------------------------------- Network ----------------------------------- +# +# Set the bind address to a specific IP (IPv4 or IPv6): +# +network.host: 0.0.0.0 +# --------------------------------- Discovery ---------------------------------- +# +# Prevent the "split brain" by configuring the majority of nodes (total number of master-eligible nodes / 2 + 1): +# +discovery.zen.minimum_master_nodes: 1 diff --git a/provisioning/resources/elasticsearch/config/jvm.options b/provisioning/resources/elasticsearch/config/jvm.options new file mode 100644 index 00000000..5a8c88ce --- /dev/null +++ b/provisioning/resources/elasticsearch/config/jvm.options @@ -0,0 +1,102 @@ +## JVM configuration + +################################################################ +## IMPORTANT: JVM heap size +################################################################ +## +## You should always set the min and max JVM heap +## size to the same value. For example, to set +## the heap to 4 GB, set: +## +## -Xms4g +## -Xmx4g +## +## See https://www.elastic.co/guide/en/elasticsearch/reference/current/heap-size.html +## for more information +## +################################################################ + +# Xms represents the initial size of total heap space +# Xmx represents the maximum size of total heap space + +-Xms4g +-Xmx4g + +################################################################ +## Expert settings +################################################################ +## +## All settings below this section are considered +## expert settings. Don't tamper with them unless +## you understand what you are doing +## +################################################################ + +## GC configuration +-XX:+UseConcMarkSweepGC +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly + +## optimizations + +# pre-touch memory pages used by the JVM during initialization +-XX:+AlwaysPreTouch + +## basic + +# explicitly set the stack size +-Xss1m + +# set to headless, just in case +-Djava.awt.headless=true + +# ensure UTF-8 encoding by default (e.g. filenames) +-Dfile.encoding=UTF-8 + +# use our provided JNA always versus the system one +-Djna.nosys=true + +# turn off a JDK optimization that throws away stack traces for common +# exceptions because stack traces are important for debugging +-XX:-OmitStackTraceInFastThrow + +# flags to configure Netty +-Dio.netty.noUnsafe=true +-Dio.netty.noKeySetOptimization=true +-Dio.netty.recycler.maxCapacityPerThread=0 + +# log4j 2 +-Dlog4j.shutdownHookEnabled=false +-Dlog4j2.disable.jmx=true + +-Djava.io.tmpdir=${ES_TMPDIR} + +## heap dumps + +# generate a heap dump when an allocation from the Java heap fails +# heap dumps are created in the working directory of the JVM +-XX:+HeapDumpOnOutOfMemoryError + +# specify an alternative path for heap dumps; ensure the directory exists and +# has sufficient space +-XX:HeapDumpPath=data + +# specify an alternative path for JVM fatal error logs +-XX:ErrorFile=logs/hs_err_pid%p.log + +## JDK 8 GC logging + +8:-XX:+PrintGCDetails +8:-XX:+PrintGCDateStamps +8:-XX:+PrintTenuringDistribution +8:-XX:+PrintGCApplicationStoppedTime +8:-Xloggc:logs/gc.log +8:-XX:+UseGCLogFileRotation +8:-XX:NumberOfGCLogFiles=32 +8:-XX:GCLogFileSize=64m + +# JDK 9+ GC logging +9-:-Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:filecount=32,filesize=64m +# due to internationalization enhancements in JDK 9 Elasticsearch need to set the provider to COMPAT otherwise +# time/date parsing will break in an incompatible way for some date patterns and locals +9-:-Djava.locale.providers=COMPAT \ No newline at end of file diff --git a/provisioning/resources/elasticsearch/config/kibana.yml b/provisioning/resources/elasticsearch/config/kibana.yml new file mode 100644 index 00000000..c7fbf815 --- /dev/null +++ b/provisioning/resources/elasticsearch/config/kibana.yml @@ -0,0 +1,114 @@ +# Kibana is served by a back end server. This setting specifies the port to use. +server.port: 5601 + +# Specifies the address to which the Kibana server will bind. IP addresses and host names are both valid values. +# The default is 'localhost', which usually means remote machines will not be able to connect. +# To allow connections from remote users, set this parameter to a non-loopback address. +server.host: "0" + +# Enables you to specify a path to mount Kibana at if you are running behind a proxy. +# Use the `server.rewriteBasePath` setting to tell Kibana if it should remove the basePath +# from requests it receives, and to prevent a deprecation warning at startup. +# This setting cannot end in a slash. +server.basePath: "kibana" + +# Specifies whether Kibana should rewrite requests that are prefixed with +# `server.basePath` or require that they are rewritten by your reverse proxy. +# This setting was effectively always `false` before Kibana 6.3 and will +# default to `true` starting in Kibana 7.0. +server.rewriteBasePath: true + +# The maximum payload size in bytes for incoming server requests. +#server.maxPayloadBytes: 1048576 + +# The Kibana server's name. This is used for display purposes. +server.name: "kibana" + +# The URL of the Elasticsearch instance to use for all your queries. +elasticsearch.url: http://elasticsearch:9200 + +# When this setting's value is true Kibana uses the hostname specified in the server.host +# setting. When the value of this setting is false, Kibana uses the hostname of the host +# that connects to this Kibana instance. +#elasticsearch.preserveHost: true + +# Kibana uses an index in Elasticsearch to store saved searches, visualizations and +# dashboards. Kibana creates a new index if the index doesn't already exist. +kibana.index: ".kibana" + +# The default application to load. +kibana.defaultAppId: "discover" + +# If your Elasticsearch is protected with basic authentication, these settings provide +# the username and password that the Kibana server uses to perform maintenance on the Kibana +# index at startup. Your Kibana users still need to authenticate with Elasticsearch, which +# is proxied through the Kibana server. +#elasticsearch.username: "user" +#elasticsearch.password: "pass" + +# Enables SSL and paths to the PEM-format SSL certificate and SSL key files, respectively. +# These settings enable SSL for outgoing requests from the Kibana server to the browser. +#server.ssl.enabled: false +#server.ssl.certificate: /path/to/your/server.crt +#server.ssl.key: /path/to/your/server.key + +# Optional settings that provide the paths to the PEM-format SSL certificate and key files. +# These files validate that your Elasticsearch backend uses the same key files. +#elasticsearch.ssl.certificate: /path/to/your/client.crt +#elasticsearch.ssl.key: /path/to/your/client.key + +# Optional setting that enables you to specify a path to the PEM file for the certificate +# authority for your Elasticsearch instance. +#elasticsearch.ssl.certificateAuthorities: [ "/path/to/your/CA.pem" ] + +# To disregard the validity of SSL certificates, change this setting's value to 'none'. +#elasticsearch.ssl.verificationMode: full + +# Time in milliseconds to wait for Elasticsearch to respond to pings. Defaults to the value of +# the elasticsearch.requestTimeout setting. +#elasticsearch.pingTimeout: 1500 + +# Time in milliseconds to wait for responses from the back end or Elasticsearch. This value +# must be a positive integer. +#elasticsearch.requestTimeout: 30000 + +# List of Kibana client-side headers to send to Elasticsearch. To send *no* client-side +# headers, set this value to [] (an empty list). +#elasticsearch.requestHeadersWhitelist: [ authorization ] + +# Header names and values that are sent to Elasticsearch. Any custom headers cannot be overwritten +# by client-side headers, regardless of the elasticsearch.requestHeadersWhitelist configuration. +#elasticsearch.customHeaders: {} + +# Time in milliseconds for Elasticsearch to wait for responses from shards. Set to 0 to disable. +#elasticsearch.shardTimeout: 30000 + +# Time in milliseconds to wait for Elasticsearch at Kibana startup before retrying. +#elasticsearch.startupTimeout: 5000 + +# Logs queries sent to Elasticsearch. Requires logging.verbose set to true. +#elasticsearch.logQueries: false + +# Specifies the path where Kibana creates the process ID file. +#pid.file: /var/run/kibana.pid + +# Enables you specify a file where Kibana stores log output. +#logging.dest: stdout + +# Set the value of this setting to true to suppress all logging output. +#logging.silent: false + +# Set the value of this setting to true to suppress all logging output other than error messages. +#logging.quiet: false + +# Set the value of this setting to true to log all events, including system usage information +# and all requests. +#logging.verbose: false + +# Set the interval in milliseconds to sample system and process performance +# metrics. Minimum is 100ms. Defaults to 5000. +#ops.interval: 5000 + +# The default locale. This locale can be used in certain circumstances to substitute any missing +# translations. +#i18n.defaultLocale: "en" \ No newline at end of file diff --git a/provisioning/resources/elasticsearch/config/log4j2.properties b/provisioning/resources/elasticsearch/config/log4j2.properties new file mode 100644 index 00000000..d0f8ef04 --- /dev/null +++ b/provisioning/resources/elasticsearch/config/log4j2.properties @@ -0,0 +1,28 @@ +status = error + +appender.console.type = Console +appender.console.name = console +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] %marker%m%n + +appender.rolling.type = RollingFile +appender.rolling.name = rolling +appender.rolling.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}.log +appender.rolling.layout.type = PatternLayout +appender.rolling.layout.pattern = [%d{ISO8601}][%-5p][%-25c] %.10000m%n +appender.rolling.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}.log.zip +appender.rolling.policies.type = Policies +appender.rolling.policies.time.type = TimeBasedTriggeringPolicy +appender.rolling.policies.time.interval = 1 +appender.rolling.policies.time.modulate = true + +appender.rolling.strategy.type = DefaultRolloverStrategy +appender.rolling.strategy.action.type = Delete +appender.rolling.strategy.action.basepath = ${sys:es.logs.base_path} +appender.rolling.strategy.action.condition.type = IfLastModified +appender.rolling.strategy.action.condition.age = 3D +appender.rolling.strategy.action.PathConditions.type = IfFileName +appender.rolling.strategy.action.PathConditions.glob = ${sys:es.logs.cluster_name}-* + +rootLogger.level = info +rootLogger.appenderRef.console.ref = console diff --git a/provisioning/resources/elasticsearch/bad-mapping.json b/provisioning/resources/elasticsearch/mapping/bad-mapping.json similarity index 72% rename from provisioning/resources/elasticsearch/bad-mapping.json rename to provisioning/resources/elasticsearch/mapping/bad-mapping.json index ee8740d0..7b96de74 100644 --- a/provisioning/resources/elasticsearch/bad-mapping.json +++ b/provisioning/resources/elasticsearch/mapping/bad-mapping.json @@ -14,23 +14,15 @@ }, "mappings": { "bad": { - "_timestamp" : { - "enabled" : "yes", - "path" : "failure_tstamp" - }, - "_ttl": { - "enabled": true, - "default": "604800000" - }, "properties": { "errors": { "properties": { "message" : { - "type": "string", + "type": "text", "analyzer": "standard" }, "level" : { - "type": "string", + "type": "text", "analyzer": "standard" } } @@ -40,7 +32,7 @@ "format": "dateOptionalTime" }, "line": { - "type": "string", + "type": "text", "analyzer": "standard" } } diff --git a/provisioning/resources/elasticsearch/good-mapping.json b/provisioning/resources/elasticsearch/mapping/good-mapping.json similarity index 57% rename from provisioning/resources/elasticsearch/good-mapping.json rename to provisioning/resources/elasticsearch/mapping/good-mapping.json index 1102d531..4437a784 100644 --- a/provisioning/resources/elasticsearch/good-mapping.json +++ b/provisioning/resources/elasticsearch/mapping/good-mapping.json @@ -14,29 +14,21 @@ }, "mappings": { "good": { - "_timestamp" : { - "enabled" : "yes", - "path" : "collector_tstamp" - }, - "_ttl": { - "enabled": true, - "default": "604800000" - }, "properties": { "app_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_colordepth": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_cookies": { "type": "boolean" }, "br_family": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_features_director": { "type": "boolean" @@ -66,24 +58,24 @@ "type": "boolean" }, "br_lang": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_name": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_renderengine": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_type": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_version": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "br_viewheight": { "type": "long" @@ -96,8 +88,8 @@ "format": "dateOptionalTime" }, "doc_charset": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "doc_height": { "type": "long" @@ -106,15 +98,15 @@ "type": "long" }, "domain_sessionid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "domain_sessionidx": { "type": "long" }, "domain_userid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "dvce_ismobile": { "type": "boolean" @@ -134,106 +126,106 @@ "format": "dateOptionalTime" }, "dvce_type": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "etl_tstamp": { "type": "date", "format": "dateOptionalTime" }, "event": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "event_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "geo_location": { "type": "geo_point" }, "mkt_campaign": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_content": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_medium": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_source": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "mkt_term": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "name_tracker": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "network_userid": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_family": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_manufacturer": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_name": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "os_timezone": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_referrer": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_title": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_url": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlfragment": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlhost": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlpath": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlport": { "type": "long" }, "page_urlquery": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "page_urlscheme": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "platform": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "pp_xoffset_max": { "type": "long" @@ -248,79 +240,79 @@ "type": "long" }, "refr_medium": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_source": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_term": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlfragment": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlhost": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlpath": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlport": { "type": "long" }, "refr_urlquery": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "refr_urlscheme": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_action": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_category": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "se_label": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_fingerprint": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_id": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "user_ipaddress": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "useragent": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_collector": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_etl": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true }, "v_tracker": { - "type": "string", - "index": "not_analyzed" + "type": "keyword", + "index": true } } } diff --git a/provisioning/resources/init/create.sh b/provisioning/resources/init/create.sh new file mode 100755 index 00000000..54d56fbf --- /dev/null +++ b/provisioning/resources/init/create.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +# Create Elasticsearch indices +curl -H 'Content-Type: application/json' -X PUT localhost:9200/good -d @/home/ubuntu/snowplow/elasticsearch/mapping/good-mapping.json && \ +curl -H 'Content-Type: application/json' -X PUT localhost:9200/bad -d @/home/ubuntu/snowplow/elasticsearch/mapping/bad-mapping.json && \ + +# Create Kibana index patterns +curl -X POST \ + http://localhost:5601/api/saved_objects/index-pattern/good \ + -H 'Content-Type: application/json' \ + -H 'kbn-xsrf: true' \ + -d '{ + "attributes": { + "title": "good", + "timeFieldName": "collector_tstamp" + } +}' + +curl -X POST \ + http://localhost:5601/api/saved_objects/index-pattern/bad \ + -H 'Content-Type: application/json' \ + -H 'kbn-xsrf: true' \ + -d '{ + "attributes": { + "title": "bad", + "timeFieldName": "failure_tstamp" + } +}' + +# Set `good` as default index pattern +curl -X POST \ + http://localhost:5601/api/kibana/settings/defaultIndex \ + -H "Content-Type: application/json" \ + -H "kbn-xsrf: true" \ + -d '{ + "value": "good" +}' + +# Create NSQ topics +curl -X POST localhost:4151/topic/create?topic=RawEvents && \ +curl -X POST localhost:4151/topic/create?topic=BadEvents && \ +curl -X POST localhost:4151/topic/create?topic=EnrichedEvents && \ +curl -X POST localhost:4151/topic/create?topic=BadEnrichedEvents && \ +curl -X POST localhost:4151/topic/create?topic=PiiEvents diff --git a/provisioning/resources/init/iglu-server-init.sql b/provisioning/resources/init/iglu-server-init.sql new file mode 100644 index 00000000..7e572822 --- /dev/null +++ b/provisioning/resources/init/iglu-server-init.sql @@ -0,0 +1,2 @@ +CREATE USER snowplow WITH PASSWORD 'snowplow'; +CREATE DATABASE iglu OWNER snowplow; diff --git a/provisioning/resources/init/iglu_server_0.2.0 b/provisioning/resources/init/iglu_server_0.2.0 deleted file mode 100755 index 019d11d3..00000000 --- a/provisioning/resources/init/iglu_server_0.2.0 +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/sh -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin/" -cmd="java -Dconfig.file=/home/ubuntu/snowplow/configs/iglu-server.conf -jar $dir/iglu-server-0.2.0.jar com.snowplowanalytics.iglu.server.Boot" -user="ubuntu" - -name="iglu_server_0.2.0" -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/kibana4_init b/provisioning/resources/init/kibana4_init deleted file mode 100755 index da5b12da..00000000 --- a/provisioning/resources/init/kibana4_init +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/sh -# -# /etc/init.d/kibana4_init -- startup script for kibana4 -# bsmith@the408.com 2015-02-20; used elasticsearch init script as template -# https://github.com/akabdog/scripts/edit/master/kibana4_init -# -### BEGIN INIT INFO -# Provides: kibana4_init -# Required-Start: $network $remote_fs $named -# Required-Stop: $network $remote_fs $named -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Starts kibana4_init -# Description: Starts kibana4_init using start-stop-daemon -### END INIT INFO - -#configure this with wherever you unpacked kibana: -KIBANA_BIN=/opt/kibana/bin - -NAME=kibana4 -PID_FILE=/var/run/$NAME.pid -PATH=/bin:/usr/bin:/sbin:/usr/sbin:$KIBANA_BIN -DAEMON=$KIBANA_BIN/kibana -DESC="Kibana4" - -if [ `id -u` -ne 0 ]; then - echo "You need root privileges to run this script" - exit 1 -fi - -. /lib/lsb/init-functions - -if [ -r /etc/default/rcS ]; then - . /etc/default/rcS -fi - -case "$1" in - start) - log_daemon_msg "Starting $DESC" - - pid=`pidofproc -p $PID_FILE kibana` - if [ -n "$pid" ] ; then - log_begin_msg "Already running." - log_end_msg 0 - exit 0 - fi - - # Start Daemon - start-stop-daemon --start --pidfile "$PID_FILE" --make-pidfile --background --exec $DAEMON - log_end_msg $? - ;; - stop) - log_daemon_msg "Stopping $DESC" - - if [ -f "$PID_FILE" ]; then - start-stop-daemon --stop --pidfile "$PID_FILE" \ - --retry=TERM/20/KILL/5 >/dev/null - if [ $? -eq 1 ]; then - log_progress_msg "$DESC is not running but pid file exists, cleaning up" - elif [ $? -eq 3 ]; then - PID="`cat $PID_FILE`" - log_failure_msg "Failed to stop $DESC (pid $PID)" - exit 1 - fi - rm -f "$PID_FILE" - else - log_progress_msg "(not running)" - fi - log_end_msg 0 - ;; - status) - status_of_proc -p $PID_FILE kibana kibana && exit 0 || exit $? - ;; - restart|force-reload) - if [ -f "$PID_FILE" ]; then - $0 stop - sleep 1 - fi - $0 start - ;; - *) - log_success_msg "Usage: $0 {start|stop|restart|force-reload|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/nsqd_init b/provisioning/resources/init/nsqd_init deleted file mode 100755 index 1d69dcb6..00000000 --- a/provisioning/resources/init/nsqd_init +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin" -cmd="$dir/nsqd --lookupd-tcp-address=127.0.0.1:4160 -data-path /home/ubuntu/snowplow/nsq-data" -user="" - -name="nsqd" - -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 - diff --git a/provisioning/resources/init/snowplow_elasticsearch_loader_bad b/provisioning/resources/init/snowplow_elasticsearch_loader_bad deleted file mode 100755 index 939fd4e4..00000000 --- a/provisioning/resources/init/snowplow_elasticsearch_loader_bad +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/sh -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin/" -cmd="java -jar $dir/snowplow-elasticsearch-loader-http-0.10.1.jar --config /home/ubuntu/snowplow/configs/snowplow-es-loader-bad.hocon" -user="ubuntu" - -name="snowplow_elasticsearch_loader_bad" -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/snowplow_elasticsearch_loader_good b/provisioning/resources/init/snowplow_elasticsearch_loader_good deleted file mode 100755 index 597e9002..00000000 --- a/provisioning/resources/init/snowplow_elasticsearch_loader_good +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/sh -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin/" -cmd="java -jar $dir/snowplow-elasticsearch-loader-http-0.10.1.jar --config /home/ubuntu/snowplow/configs/snowplow-es-loader-good.hocon" -user="ubuntu" - -name="snowplow_elasticsearch_loader_good" -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/nsqlookupd_init b/provisioning/resources/init/snowplow_mini old mode 100755 new mode 100644 similarity index 93% rename from provisioning/resources/init/nsqlookupd_init rename to provisioning/resources/init/snowplow_mini index 0c8e8b35..68703ce3 --- a/provisioning/resources/init/nsqlookupd_init +++ b/provisioning/resources/init/snowplow_mini @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh ### BEGIN INIT INFO # Provides: # Required-Start: $remote_fs $syslog @@ -9,12 +9,10 @@ # Description: Enable service provided by daemon. ### END INIT INFO -dir="/home/ubuntu/snowplow/bin" -cmd="$dir/nsqlookupd" -user="" - -name="nsqlookupd" +cmd="docker-compose -f /home/ubuntu/snowplow/docker-compose.yml up -d && sleep 20" +user="ubuntu" +name="snowplow_mini" pid_file="/var/run/$name.pid" stdout_log="/var/log/$name.log" stderr_log="/var/log/$name.err" @@ -95,4 +93,4 @@ case "$1" in ;; esac -exit 0 +exit 0 \ No newline at end of file diff --git a/provisioning/resources/init/snowplow_stream_collector b/provisioning/resources/init/snowplow_stream_collector deleted file mode 100755 index 4b8507be..00000000 --- a/provisioning/resources/init/snowplow_stream_collector +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/sh -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin/" -cmd="java -jar $dir/snowplow-stream-collector-0.11.0.jar --config /home/ubuntu/snowplow/configs/snowplow-stream-collector.hocon" -user="ubuntu" - -name="snowplow_stream_collector" -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/snowplow_stream_enrich b/provisioning/resources/init/snowplow_stream_enrich deleted file mode 100755 index f64cfad6..00000000 --- a/provisioning/resources/init/snowplow_stream_enrich +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/sh -### BEGIN INIT INFO -# Provides: -# Required-Start: $remote_fs $syslog -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start daemon at boot time -# Description: Enable service provided by daemon. -### END INIT INFO - -dir="/home/ubuntu/snowplow/bin/" -cmd="java -jar snowplow-stream-enrich-nsq-0.16.1.jar --config /home/ubuntu/snowplow/configs/snowplow-stream-enrich.hocon --resolver file:/home/ubuntu/snowplow/configs/iglu-resolver.json --enrichments file:/home/ubuntu/snowplow/configs/enrichments" -user="ubuntu" - -name="snowplow_stream_enrich" -pid_file="/var/run/$name.pid" -stdout_log="/var/log/$name.log" -stderr_log="/var/log/$name.err" - -get_pid() { - cat "$pid_file" -} - -is_running() { - [ -f "$pid_file" ] && ps `get_pid` > /dev/null 2>&1 -} - -case "$1" in - start) - if is_running; then - echo "Already started" - else - echo "Starting $name" - cd $dir - if [ -z "$user" ]; then - sudo $cmd >> "$stdout_log" 2>> "$stderr_log" & - else - sudo -u "$user" $cmd >> "$stdout_log" 2>> "$stderr_log" & - fi - echo $! > "$pid_file" - if ! is_running; then - echo "Unable to start, see $stdout_log and $stderr_log" - exit 1 - fi - fi - ;; - stop) - if is_running; then - echo -n "Stopping $name.." - kill `get_pid` - for i in {1..10} - do - if ! is_running; then - break - fi - - echo -n "." - sleep 1 - done - echo - - if is_running; then - echo "Not stopped; may still be shutting down or shutdown may have failed" - exit 1 - else - echo "Stopped" - if [ -f "$pid_file" ]; then - rm "$pid_file" - fi - fi - else - echo "Not running" - fi - ;; - restart) - $0 stop - if is_running; then - echo "Unable to stop, will not attempt to start" - exit 1 - fi - $0 start - ;; - status) - if is_running; then - echo "Running" - else - echo "Stopped" - exit 1 - fi - ;; - *) - echo "Usage: $0 {start|stop|restart|status}" - exit 1 - ;; -esac - -exit 0 diff --git a/provisioning/resources/init/wait-for-postgres.sh b/provisioning/resources/init/wait-for-postgres.sh new file mode 100755 index 00000000..479a5d64 --- /dev/null +++ b/provisioning/resources/init/wait-for-postgres.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# wait-for-postgres.sh + +set -e + +host="$1" +shift +cmd="$@" + +until PGPASSWORD=snowplow psql -h "$host" -d "iglu" -U "snowplow" -c '\q'; do + >&2 echo "Postgres is unavailable - sleeping" + sleep 2 +done + +>&2 echo "Postgres is up - executing command" +exec docker-entrypoint.sh $cmd \ No newline at end of file diff --git a/provisioning/resources/ui/index.html b/provisioning/resources/ui/index.html index ed52360a..ca4439af 100644 --- a/provisioning/resources/ui/index.html +++ b/provisioning/resources/ui/index.html @@ -14,7 +14,7 @@

SNOWPLOW MINI

-

COPYRIGHT © 2016-2017 SNOWPLOW ANALYTICS LTD. ALL RIGHTS RESERVED. PRIVACY POLICY.

+

COPYRIGHT © 2016-2018 SNOWPLOW ANALYTICS LTD. ALL RIGHTS RESERVED. PRIVACY POLICY.

diff --git a/provisioning/resources/ui/js/components/ControlPlane.tsx b/provisioning/resources/ui/js/components/ControlPlane.tsx index 744cbccb..d7b2a542 100644 --- a/provisioning/resources/ui/js/components/ControlPlane.tsx +++ b/provisioning/resources/ui/js/components/ControlPlane.tsx @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. @@ -25,6 +25,7 @@ import AddExternalIgluServerForm from "./ControlPlaneComponents/AddExternalIgluS import AddLocalIgluApikeyForm from "./ControlPlaneComponents/AddLocalIgluApikey"; import ChangeUsernamePasswordForm from "./ControlPlaneComponents/ChangeUsernamePassword"; import AddDomainNameForm from "./ControlPlaneComponents/AddDomainName"; +import IgluServerConfigForm from "./ControlPlaneComponents/IgluServerConfig"; export class ControlPlane extends React.Component<{}, {}> { @@ -34,6 +35,7 @@ export class ControlPlane extends React.Component<{}, {}> {

The buttons below can be used to interact with the internal systems of Snowplow Mini:

+ diff --git a/provisioning/resources/ui/js/components/ControlPlaneComponents/AddDomainName.tsx b/provisioning/resources/ui/js/components/ControlPlaneComponents/AddDomainName.tsx index c7195a67..b526ee14 100644 --- a/provisioning/resources/ui/js/components/ControlPlaneComponents/AddDomainName.tsx +++ b/provisioning/resources/ui/js/components/ControlPlaneComponents/AddDomainName.tsx @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. diff --git a/provisioning/resources/ui/js/components/ControlPlaneComponents/AddExternalIgluServer.tsx b/provisioning/resources/ui/js/components/ControlPlaneComponents/AddExternalIgluServer.tsx index 4f35017d..a2b6de3f 100644 --- a/provisioning/resources/ui/js/components/ControlPlaneComponents/AddExternalIgluServer.tsx +++ b/provisioning/resources/ui/js/components/ControlPlaneComponents/AddExternalIgluServer.tsx @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. diff --git a/provisioning/resources/ui/js/components/ControlPlaneComponents/AddLocalIgluApikey.tsx b/provisioning/resources/ui/js/components/ControlPlaneComponents/AddLocalIgluApikey.tsx index 7820a6a9..ccdf11a5 100644 --- a/provisioning/resources/ui/js/components/ControlPlaneComponents/AddLocalIgluApikey.tsx +++ b/provisioning/resources/ui/js/components/ControlPlaneComponents/AddLocalIgluApikey.tsx @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. diff --git a/provisioning/resources/ui/js/components/ControlPlaneComponents/AlertOptions.tsx b/provisioning/resources/ui/js/components/ControlPlaneComponents/AlertOptions.tsx index 5a4b2f12..205abdd6 100644 --- a/provisioning/resources/ui/js/components/ControlPlaneComponents/AlertOptions.tsx +++ b/provisioning/resources/ui/js/components/ControlPlaneComponents/AlertOptions.tsx @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. diff --git a/provisioning/resources/ui/js/components/ControlPlaneComponents/ChangeUsernamePassword.tsx b/provisioning/resources/ui/js/components/ControlPlaneComponents/ChangeUsernamePassword.tsx index 9e005a70..526de40d 100644 --- a/provisioning/resources/ui/js/components/ControlPlaneComponents/ChangeUsernamePassword.tsx +++ b/provisioning/resources/ui/js/components/ControlPlaneComponents/ChangeUsernamePassword.tsx @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. diff --git a/provisioning/resources/ui/js/components/ControlPlaneComponents/IgluServerConfig.tsx b/provisioning/resources/ui/js/components/ControlPlaneComponents/IgluServerConfig.tsx new file mode 100644 index 00000000..3b1f050b --- /dev/null +++ b/provisioning/resources/ui/js/components/ControlPlaneComponents/IgluServerConfig.tsx @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, + * and you may not use this file except in compliance with the Apache License Version 2.0. + * You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the Apache License Version 2.0 is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Apache License Version 2.0 for the specific language governing permissions and limitations there under. + */ + +/// +/// +/// +/// + +import React = require('react'); +import ReactDOM = require("react-dom"); +import AlertContainer from 'react-alert'; +import alertOptions from './AlertOptions' +import axios from 'axios'; + +var alertContainer = new AlertContainer(); + +export default React.createClass({ + getInitialState () { + return { + data: new FormData(), + disabled: false + }; + }, + + uploadNewFile(evt) { + this.state.data.append('igluserverhocon', evt.target.files[0]) + }, + + sendFormData() { + var alertShow = alertContainer.show + var _this = this + + _this.setState({ + disabled: true + }); + + axios.defaults.headers.post['Content-Type'] = 'multipart/form-data'; + axios.post('/control-plane/iglu-config', this.state.data, {}) + .then(function (response) { + setInitState() + alertShow('Uploaded successfully', { + time: 2000, + type: 'success' + }); + }) + .catch(function (error) { + setInitState() + alertShow('Error: ' + error.response.data, { + time: 2000, + type: 'error' + }); + }); + + function setInitState() { + _this.setState({ + iglu_server_uri: "", + iglu_server_apikey: "", + disabled: false + }); + } + }, + + handleSubmit(event) { + var alertShow = alertContainer.show + alertShow('Please wait...', { + time: 2000, + type: 'info' + }); + event.preventDefault(); + this.sendFormData(); + }, + + render() { + return ( +
+

Upload Iglu Server config file:

+
+
+ +
+
+ +
+
+ alertContainer = a} {...alertOptions} /> +
+ ); + } +}); diff --git a/provisioning/resources/ui/js/components/ControlPlaneComponents/RestartServices.tsx b/provisioning/resources/ui/js/components/ControlPlaneComponents/RestartServices.tsx index ad2ef205..7623fb44 100644 --- a/provisioning/resources/ui/js/components/ControlPlaneComponents/RestartServices.tsx +++ b/provisioning/resources/ui/js/components/ControlPlaneComponents/RestartServices.tsx @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. diff --git a/provisioning/resources/ui/js/components/ControlPlaneComponents/UploadEnrichments.tsx b/provisioning/resources/ui/js/components/ControlPlaneComponents/UploadEnrichments.tsx index 816b6264..ec5400ba 100644 --- a/provisioning/resources/ui/js/components/ControlPlaneComponents/UploadEnrichments.tsx +++ b/provisioning/resources/ui/js/components/ControlPlaneComponents/UploadEnrichments.tsx @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 Snowplow Analytics Ltd. All rights reserved. + * Copyright (c) 2016-2018 Snowplow Analytics Ltd. All rights reserved. * * This program is licensed to you under the Apache License Version 2.0, * and you may not use this file except in compliance with the Apache License Version 2.0. diff --git a/provisioning/resources/ui/js/components/Elasticsearch.tsx b/provisioning/resources/ui/js/components/Elasticsearch.tsx index 0a7618f7..7a878d83 100644 --- a/provisioning/resources/ui/js/components/Elasticsearch.tsx +++ b/provisioning/resources/ui/js/components/Elasticsearch.tsx @@ -43,7 +43,6 @@ export class Elasticsearch extends React.Component<{}, {}> {

Quicklinks:

); diff --git a/provisioning/resources/ui/js/components/Overview.tsx b/provisioning/resources/ui/js/components/Overview.tsx index 7e3ee58d..df2e2043 100644 --- a/provisioning/resources/ui/js/components/Overview.tsx +++ b/provisioning/resources/ui/js/components/Overview.tsx @@ -25,7 +25,6 @@ export class Overview extends React.Component<{}, {}> { var collector: string = location.protocol + '//' + window.location.host; var kibana: string = location.protocol + '//' + window.location.host + '/kibana/'; - var head_plugin: string = location.protocol + '//' + window.location.host + '/elasticsearch/_plugin/head/'; var elasticsearch: string = location.protocol + '//' + window.location.host + '/elasticsearch'; return ( @@ -38,7 +37,7 @@ export class Overview extends React.Component<{}, {}> {

You can send events into Snowplow Mini automatically from the Example events page. Simply go to that page and click the sample event buttons.

Alternatively, you can setup any of the Snowplow trackers to send data to this endpoint: {collector}

2. Viewing the events

-

You can view the events that have been sent to Elasticsearch in the Kibana Dashboard or the Head Plugin.

+

You can view the events that have been sent to Elasticsearch in the Kibana Dashboard or the Head Plugin.

You can also submit queries directly to the Elasticsearch endpoint.

3. Understanding how Snowplow Mini works

Quicklinks:

@@ -49,13 +48,14 @@ export class Overview extends React.Component<{}, {}> {

The software stack installed:

    -
  • Snowplow Stream Collector 0.11.0
  • -
  • Snowplow Stream Enrich NSQ 0.16.1
  • -
  • Snowplow Elasticsearch Sink 0.10.1
  • -
  • Snowplow Iglu Server 0.2.0
  • -
  • NSQ 1.0.0
  • -
  • Elasticsearch 1.7.5
  • -
  • Kibana 4.0.1
  • +
  • Snowplow Stream Collector NSQ 0.13.0
  • +
  • Snowplow Stream Enrich NSQ 0.18.0
  • +
  • Snowplow Elasticsearch Loader 0.10.1
  • +
  • Snowplow Iglu Server 0.3.0
  • +
  • Postgres 9.5
  • +
  • NSQ v1.0.0-compat
  • +
  • Elasticsearch-OSS 6.3.1
  • +
  • Kibana-OSS 6.3.1

Stack topology:

diff --git a/provisioning/roles/base/tasks/main.yml b/provisioning/roles/base/tasks/main.yml index b561b6f4..b6189dc9 100644 --- a/provisioning/roles/base/tasks/main.yml +++ b/provisioning/roles/base/tasks/main.yml @@ -1,10 +1,10 @@ --- - name: Update apt cache apt: update-cache=yes cache_valid_time=86400 - sudo: yes + become: yes - name: install basic packages - sudo: yes + become: yes apt: pkg={{ item }} state=latest tags: [packages] with_items: @@ -15,8 +15,8 @@ lineinfile: dest="/etc/resolvconf/resolv.conf.d/base" line="nameserver 8.8.8.8" - sudo: yes + become: yes - name: Update nameserver list command: "resolvconf -u" - sudo: yes + become: yes diff --git a/provisioning/roles/docker/files/.env b/provisioning/roles/docker/files/.env new file mode 100755 index 00000000..268f9274 --- /dev/null +++ b/provisioning/roles/docker/files/.env @@ -0,0 +1,2 @@ +ES_JVM_SIZE=4g +SP_JVM_SIZE=512m diff --git a/provisioning/roles/docker/files/docker-compose.yml b/provisioning/roles/docker/files/docker-compose.yml new file mode 100644 index 00000000..a86b4b71 --- /dev/null +++ b/provisioning/roles/docker/files/docker-compose.yml @@ -0,0 +1,177 @@ +version: "3" + +services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.3.1 + container_name: elasticsearch + restart: always + environment: + # Swapping needs to be disabled for performance and node stability + - "bootstrap.memory_lock=true" + - ES_JAVA_OPTS=-Xms${ES_JVM_SIZE} -Xmx${ES_JVM_SIZE} + volumes: + - /home/ubuntu/snowplow/elasticsearch/data:/usr/share/elasticsearch/data + - /home/ubuntu/snowplow/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml + - /home/ubuntu/snowplow/elasticsearch/config/log4j2.properties:/usr/share/elasticsearch/config/log4j2.properties + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + logging: + options: + max-size: "50M" + max-file: "10" + ports: + - "9200:9200" + - "9300:9300" + + kibana: + image: docker.elastic.co/kibana/kibana-oss:6.3.1 + container_name: kibana + restart: always + environment: + - /Users/oguzhanunlu/work/snowplow-mini/provisioning/resources/elasticsearch/config/kibana.yml:/usr/share/kibana/config/kibana.yml + ports: + - "5601:5601" + depends_on: + - elasticsearch + + elasticsearch-loader-good: + image: snowplow-docker-registry.bintray.io/snowplow/elasticsearch-loader:0.10.2 + container_name: elasticsearch-loader-good + command: [ "--config", "/snowplow/config/snowplow-es-loader-good.hocon" ] + restart: always + depends_on: + - elasticsearch + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xmx${SP_JVM_SIZE}" + + elasticsearch-loader-bad: + image: snowplow-docker-registry.bintray.io/snowplow/elasticsearch-loader:0.10.2 + container_name: elasticsearch-loader-bad + command: [ "--config", "/snowplow/config/snowplow-es-loader-bad.hocon" ] + restart: always + depends_on: + - elasticsearch + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xmx${SP_JVM_SIZE}" + + nsqlookupd: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqlookupd + command: /nsqlookupd --broadcast-address=nsqlookupd + restart: always + logging: + options: + max-size: "1M" + max-file: "10" + ports: + - "4160:4160" + - "4161:4161" + + nsqd: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqd + command: /nsqd --broadcast-address=nsqd --lookupd-tcp-address=nsqlookupd:4160 --data-path=/home/ubuntu/snowplow/nsq-data + restart: always + volumes: + - /home/ubuntu/snowplow/nsq-data:/home/ubuntu/snowplow/nsq-data + depends_on: + - nsqlookupd + ports: + - "4150:4150" + - "4151:4151" + + nsqadmin: + image: nsqio/nsq:v1.0.0-compat + container_name: nsqadmin + command: /nsqadmin --lookupd-http-address=nsqlookupd:4161 + restart: always + depends_on: + - nsqlookupd + ports: + - "4171:4171" + + scala-stream-collector: + image: snowplow-docker-registry.bintray.io/snowplow/scala-stream-collector-nsq:0.13.0 + container_name: scala-stream-collector-nsq + command: [ "--config", "/snowplow/config/snowplow-stream-collector.hocon" ] + restart: always + depends_on: + - nsqd + ports: + - "8080:8080" + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xmx${SP_JVM_SIZE}" + + stream-enrich: + image: snowplow-docker-registry.bintray.io/snowplow/stream-enrich-nsq:0.18.0 + container_name: stream-enrich-nsq + command: [ + "--config", "/snowplow/config/snowplow-stream-enrich.hocon", + "--resolver", "file:/snowplow/config/iglu-resolver.json", + "--enrichments", "file:/snowplow/config/enrichments", + "--force-cached-files-download" + ] + restart: always + depends_on: + - scala-stream-collector + volumes: + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" + environment: + - "SP_JAVA_OPTS=-Xmx${SP_JVM_SIZE}" + + postgres: + image: postgres:9.5 + container_name: postgres + restart: always + volumes: + - /home/ubuntu/snowplow/init/iglu-server-init.sql:/docker-entrypoint-initdb.d/init.sql + ports: + - "5433:5432" + logging: + options: + max-size: "1M" + max-file: "10" + + iglu-server: + image: snowplow-docker-registry.bintray.io/snowplow/iglu-server:0.3.0 + container_name: iglu-server + entrypoint: /snowplow/bin/wait-for-postgres.sh postgres --config /snowplow/config/iglu-server.conf + restart: always + depends_on: + - postgres + ports: + - "8081:8081" + volumes: + - /home/ubuntu/snowplow/init/wait-for-postgres.sh:/snowplow/bin/wait-for-postgres.sh + - /home/ubuntu/snowplow/configs:/snowplow/config + logging: + options: + max-size: "1M" + max-file: "10" diff --git a/provisioning/roles/docker/tasks/main.yml b/provisioning/roles/docker/tasks/main.yml new file mode 100644 index 00000000..0e512ece --- /dev/null +++ b/provisioning/roles/docker/tasks/main.yml @@ -0,0 +1,44 @@ +--- +- include_vars: ../../common_vars.yml + +- name: Setup the docker repository and install docker + become: yes + shell: | + apt-get update + apt-get install apt-transport-https ca-certificates curl software-properties-common --yes + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" + apt-get update && apt-get install docker-ce --yes + +- name: Download docker-compose + become: yes + shell: curl -L https://github.com/docker/compose/releases/download/1.21.2/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose + +- name: Apply executable permissions to the docker-compose binary + become: yes + shell: chmod +x /usr/local/bin/docker-compose + +- name: Copy docker-compose.yml and its environment file + copy: src={{ item.src }} dest={{ item.dest }} owner=ubuntu group=ubuntu mode=0644 + with_items: + - { src: '../files/docker-compose.yml', dest: '/home/ubuntu/snowplow/' } + - { src: '../files/.env', dest: '/home/ubuntu/snowplow/' } + +- name: Create & set permissions of elasticsearch data directory + become: yes + shell: mkdir {{main_dir}}/elasticsearch/data && chown -R 1000:1000 {{main_dir}}/elasticsearch/data + +- name: Deploy snowplow mini + become: yes + shell: cd {{main_dir}} && docker-compose up -d && sleep 20 + +- name: Wait for Elasticsearch port 9200 to become open on the host, don't start checking for 10 seconds + wait_for: + port: 9200 + delay: 10 + sleep: 5 + connect_timeout: 60 + +- name: Create ES indexes & Kibana index patterns & NSQ topics + become: yes + shell: sh {{init_dir}}/create.sh diff --git a/provisioning/roles/packer/tasks/main.yml b/provisioning/roles/packer/tasks/main.yml index 62dceaa2..fc516a35 100644 --- a/provisioning/roles/packer/tasks/main.yml +++ b/provisioning/roles/packer/tasks/main.yml @@ -1,12 +1,12 @@ --- - name: Download Packer - sudo: yes + become: yes shell: curl -o {{ packer_install_dir }}/{{ packer_archive }}.zip {{ packer_url }} - name: Extract Packer - sudo: yes + become: yes unarchive: copy=no src={{ packer_install_dir }}/{{ packer_archive }}.zip dest={{ packer_install_dir }} - name: Remove downloaded packer_archive - sudo: yes + become: yes shell: "rm -f {{ packer_install_dir }}/{{ packer_archive }}.zip" diff --git a/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml b/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml index e8c81c4b..b583f489 100644 --- a/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml +++ b/provisioning/roles/sp_mini_1_create_dirs/tasks/main.yml @@ -10,26 +10,8 @@ shell: 'adduser ubuntu --disabled-password --gecos "" ; passwd -d ubuntu' -- name: Insert logrotate configuration for Snowplow Services - become: yes - copy: - dest: "/etc/logrotate.d/snowplow-apps" - content: | - /var/log/snowplow*.log /var/log/snowplow*.err /var/log/nsq*.log /var/log/nsq*.err /var/log/iglu_server*.log /var/log/iglu_server*.err { - hourly - rotate 3 - missingok - notifempty - copytruncate - } - mode: 0644 - -- name: Change logrotate cron to hourly - become: yes - shell: 'mv /etc/cron.daily/logrotate /etc/cron.hourly && service cron restart' - - name: creating directories - file: path={{item}} state=directory + file: path={{item}} state=directory mode=0755 with_items: - "{{configs_dir}}" - "{{staging_dir}}" @@ -48,13 +30,20 @@ src: "{{playbook_dir}}/resources/elasticsearch" dest: "{{main_dir}}" recursive: yes + archive: no - synchronize: src: "{{playbook_dir}}/resources/configs" dest: "{{main_dir}}" recursive: yes + archive: no - synchronize: src: "{{playbook_dir}}/resources/init" dest: "{{main_dir}}" recursive: yes + archive: no + +- name: Install NTP to prevent clock drifts + become: yes + shell: apt-get update && apt-get install ntp -y diff --git a/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml b/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml deleted file mode 100644 index ad7c423b..00000000 --- a/provisioning/roles/sp_mini_2_install_postgresl/tasks/main.yml +++ /dev/null @@ -1,53 +0,0 @@ ---- -- include_vars: ../../common_vars.yml - -- name: Adding APT repository key - become: yes - apt_key: - id: ACCC4CF8 - url: https://www.postgresql.org/media/keys/ACCC4CF8.asc - tags: - - postgresql - - db - - repo - -- name: Add PostgreSQL official APT repository - become: yes - apt_repository: - repo: "deb http://apt.postgresql.org/pub/repos/apt/ {{ansible_distribution_release}}-pgdg main" - tags: - - postgresql - - db - - repo - -- name: Install acl for creating Postgresql user - become: yes - apt: - name: "acl" - state: present - update_cache: yes - cache_valid_time: 3600 - -- name: Install PostgreSQL - become: yes - apt: - name: "postgresql-9.5" - state: present - update_cache: yes - cache_valid_time: 3600 - tags: - - postgresql - - db - - deps - -- name: Install dependencies for the Ansible module - become: yes - apt: - name: "{{item}}" - state: latest - with_items: - - python-psycopg2 - tags: - - postgresql - - db - - deps diff --git a/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml b/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml index 47313c89..ca78bfd4 100644 --- a/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml +++ b/provisioning/roles/sp_mini_4_setup_apps/tasks/main.yml @@ -3,43 +3,8 @@ - name: Set variables set_fact: - stream_collector_package: 'snowplow_scala_stream_collector_0.11.0.zip' - stream_enrich_package: 'snowplow_stream_enrich_nsq_0.16.1.zip' - es_loader_package: 'snowplow_elasticsearch_loader_http_0.10.1.zip' - iglu_server_package: 'iglu_server_0.2.0.zip' - kibana_v: '4.0.1' - nsq_package: 'nsq-1.0.0-compat.linux-amd64.go1.8.tar.gz' - nsq_bin_dir: 'nsq-1.0.0-compat.linux-amd64.go1.8/bin' control_plane_dir: '{{playbook_dir}}/resources/control-plane' -- name: Install unzip - become: yes - apt: - name: "unzip" - state: present - update_cache: yes - cache_valid_time: 3600 - -- name: Add Java 8 repository - become: yes - apt_repository: - repo: 'ppa:webupd8team/java' - state: present - -- name: Signed Oracle License - become: yes - shell: "echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | sudo /usr/bin/debconf-set-selections" - register: oracle_license_signed - -- name: Install Java 8 - become: yes - apt: - name: oracle-java8-installer - state: present - update_cache: yes - cache_valid_time: 3600 - when: oracle_license_signed|changed - - name: Copy Control API to executables dir become: yes synchronize: @@ -52,134 +17,6 @@ src: "{{playbook_dir}}/../VERSION" dest: "{{main_dir}}" -- name: Check Stream Collector - stat: - path: "{{staging_dir}}/{{stream_collector_package}}" - register: check_stream_collector_result - -- name: Download Stream Collector - get_url: - url: "http://dl.bintray.com/snowplow/snowplow-generic/{{stream_collector_package}}" - dest: "{{staging_dir}}" - when: check_stream_collector_result.stat.exists == False - register: stream_collector_downloaded - -- name: Unzip downloaded Stream Collector - shell: "unzip {{staging_dir}}/{{stream_collector_package}} -d {{executables_dir}}" - when: stream_collector_downloaded|changed - -- name: Check Stream Enrich - stat: - path: "{{staging_dir}}/{{stream_enrich_package}}" - register: check_stream_enrich_result - -- name: Download Stream Enrich - get_url: - url: "http://dl.bintray.com/snowplow/snowplow-generic/{{stream_enrich_package}}" - dest: "{{staging_dir}}" - when: check_stream_enrich_result.stat.exists == False - register: stream_enrich_downloaded - -- name: Unzip downloaded Stream Enrich - shell: "unzip {{staging_dir}}/{{stream_enrich_package}} -d {{executables_dir}}" - when: stream_enrich_downloaded|changed - -- name: Check Elasticsearch Loader - stat: - path: "{{staging_dir}}/{{es_loader_package}}" - register: check_es_loader_result - -- name: Download Elasticsearch Loader - get_url: - url: "http://bintray.com/artifact/download/snowplow/snowplow-generic/{{es_loader_package}}" - dest: "{{staging_dir}}" - when: check_es_loader_result.stat.exists == False - register: es_loader_downloaded - -- name: Unzip downloaded Elasticsearch Loader - shell: "unzip {{staging_dir}}/{{es_loader_package}} -d {{executables_dir}}" - when: es_loader_downloaded|changed - -- name: Check Iglu Server - stat: - path: "{{staging_dir}}/{{iglu_server_package}}" - register: check_iglu_server_result - -- name: Download Iglu Server - get_url: - url: "http://bintray.com/artifact/download/snowplow/snowplow-generic/{{iglu_server_package}}" - dest: "{{staging_dir}}" - when: check_iglu_server_result.stat.exists == False - register: iglu_server_downloaded - -- name: Unzip downloaded Iglu Server - shell: "unzip {{staging_dir}}/{{iglu_server_package}} -d {{executables_dir}}" - when: iglu_server_downloaded|changed - register: iglu_server_extracted - -- name: Download NSQ - get_url: - url: "https://s3.amazonaws.com/bitly-downloads/nsq/{{nsq_package}}" - dest: "{{staging_dir}}" - -- name: Unzip downloaded NSQ - shell: "tar xvfz {{staging_dir}}/{{nsq_package}} --directory {{staging_dir}}" - -- name: Copy NSQ binaries to executables_dir - shell: "cp {{staging_dir}}/{{nsq_bin_dir}}/nsqd {{staging_dir}}/{{nsq_bin_dir}}/nsqlookupd {{executables_dir}}" - -- name: Create snowplow user on Postgresql - become: true - become_user: postgres - postgresql_user: - name: snowplow - password: snowplow - -- name: Create iglu db on Postgresql - become: true - become_user: postgres - postgresql_db: - name: iglu - owner: snowplow - -- name: Download Elasticsearch - become: yes - shell: "wget https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.5.deb -P {{staging_dir}}" - -- name: Install Elasticsearch - become: yes - shell: "dpkg -i {{staging_dir}}/elasticsearch-1.7.5.deb" - -- name: Install Elasticsearch Head Plugin - become: yes - shell: "/usr/share/elasticsearch/bin/plugin --install mobz/elasticsearch-head" - -- name: Check Kibana - stat: - path: "{{staging_dir}}/kibana-{{kibana_v}}-linux-x64.zip" - register: check_kibana_result - -- name: Download Kibana - get_url: - url: "https://download.elasticsearch.org/kibana/kibana/kibana-{{kibana_v}}-linux-x64.zip" - dest: "{{staging_dir}}" - when: check_kibana_result.stat.exists == False - register: kibana_downloaded - -- name: Unzip downloaded Kibana package - become: yes - shell: "unzip {{staging_dir}}/kibana-{{kibana_v}}-linux-x64.zip -d /opt/" - when: kibana_downloaded|changed - register: kibana_unzipped - -- name: Symlink for kibana - become: yes - file: - src: "/opt/kibana-{{kibana_v}}-linux-x64" - dest: "/opt/kibana" - state: link - when: kibana_unzipped|changed - - name: Copy Caddy executable to executables dir become: yes environment: @@ -191,4 +28,4 @@ - name: Set owner of the main directory become: yes - shell: "chown -R ubuntu:ubuntu {{main_dir}}" + shell: "chown -R ubuntu:ubuntu {{main_dir}} && chmod 755 -R {{main_dir}}" diff --git a/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml b/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml index 403b8739..8d8a4728 100644 --- a/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml +++ b/provisioning/roles/sp_mini_7_setup_init/tasks/main.yml @@ -9,29 +9,32 @@ dest: "/etc/init.d" mode: 0755 with_items: - - kibana4_init + - snowplow_mini - snowplow_mini_control_plane_api - - snowplow_stream_collector - - snowplow_stream_enrich - - snowplow_elasticsearch_loader_good - - snowplow_elasticsearch_loader_bad - - iglu_server_0.2.0 - caddy_init - - nsqd_init - - nsqlookupd_init - name: Configure for inits for calling at boot time become: yes shell: "update-rc.d {{item}} defaults" with_items: - - kibana4_init + - snowplow_mini - snowplow_mini_control_plane_api - - snowplow_stream_collector - - snowplow_stream_enrich - - snowplow_elasticsearch_loader_good - - snowplow_elasticsearch_loader_bad - - iglu_server_0.2.0 - - elasticsearch - caddy_init - - nsqd_init - - nsqlookupd_init + +- name: Start UI and Control Plane + become: yes + shell: service snowplow_mini_control_plane_api start && service caddy_init start + +- name: Increase mmap count to recommended 262144 for Elasticsearch + become: yes + shell: echo "vm.max_map_count=262144" >> /etc/sysctl.conf && service procps start + +- cron: + name: "Add cronjob to crop ES good index's docs older than a week" + special_time: weekly + job: /usr/bin/curl -s -X POST http://localhost:9200/good/_delete_by_query -H 'Content-Type:application/json' -d '{ "query" :{ "range" :{ "collector_tstamp" :{ "lt" :"now-1w/d" } } } }' > /dev/null 2>&1 + +- cron: + name: "Add cronjob to crop ES bad index's docs older than a week" + special_time: weekly + job: /usr/bin/curl -s -X POST http://localhost:9200/bad/_delete_by_query -H 'Content-Type:application/json' -d '{ "query" :{ "range" :{ "failure_tstamp" :{ "lt" :"now-1w/d" } } } }' > /dev/null 2>&1 diff --git a/provisioning/roles/sp_mini_8_configure/tasks/main.yml b/provisioning/roles/sp_mini_8_configure/tasks/main.yml deleted file mode 100644 index ade8210a..00000000 --- a/provisioning/roles/sp_mini_8_configure/tasks/main.yml +++ /dev/null @@ -1,72 +0,0 @@ ---- -- include_vars: ../../common_vars.yml - -- name: Starting Elasticsearch - become: yes - service: - name: elasticsearch - state: started - register: ElasticsearchStarted - -- name: Wait for Elasticsearch port 9200 to become open on the host, don't start checking for 10 seconds - wait_for: - port: 9200 - delay: 10 - -- name: curl put good-mapping.json - shell: "curl -XPUT 'http://localhost:9200/good' -d @{{es_dir}}/good-mapping.json" - -- name: curl put bad-mapping.json - shell: "curl -XPUT 'http://localhost:9200/bad' -d @{{es_dir}}/bad-mapping.json" - -- name: Starting nsqd - become: yes - service: - name: nsqd_init - state: started - register: NsqdStarted - -- name: Starting nsqlookupd - become: yes - service: - name: nsqlookupd_init - state: started - register: NsqlookupdStarted - -- name: Wait for the NSQ services to start - wait_for: - port: "{{item}}" - delay: 1 - with_items: - - 4150 - - 4161 - -- name: Starting Kibana - become: yes - service: - name: kibana4_init - state: started - -- name: add "good" index pattern to Kibana - shell: > - curl -XPUT http://localhost:9200/.kibana/index-pattern/good -d '{"title" : "good", "timeFieldName" : "collector_tstamp"}' - -- name: add "bad" index pattern to Kibana - shell: > - curl -XPUT http://localhost:9200/.kibana/index-pattern/bad -d '{"title" : "bad", "timeFieldName" : "failure_tstamp"}' - -- name: make "good" index pattern default - shell: > - curl -XPUT http://localhost:9200/.kibana/config/4.0.1 -d '{"defaultIndex" : "good"}' - -- name: Create new topic for RawEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=RawEvents" - -- name: Create new topic for BadEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=BadEvents" - -- name: Create new topic for EnrichedEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=EnrichedEvents" - -- name: Create new topic for BadEnrichedEvents - shell: "curl -X POST http://127.0.0.1:4151/topic/create?topic=BadEnrichedEvents" diff --git a/provisioning/roles/typescript/tasks/main.yml b/provisioning/roles/typescript/tasks/main.yml index 6e4895b5..290c4799 100644 --- a/provisioning/roles/typescript/tasks/main.yml +++ b/provisioning/roles/typescript/tasks/main.yml @@ -1,7 +1,7 @@ --- - name: Install Typescript, Browserify and Uglify command: "{{ item }}" - sudo: yes + become: yes with_items: - "npm install -g typescript@2.3.2" - "npm install -g tsd@0.6.5" diff --git a/provisioning/with_building_ui_and_go_projects.yml b/provisioning/with_building_ui_and_go_projects.yml index 8bf08842..15899f46 100644 --- a/provisioning/with_building_ui_and_go_projects.yml +++ b/provisioning/with_building_ui_and_go_projects.yml @@ -12,10 +12,9 @@ - typescript - packer - sp_mini_1_create_dirs - - sp_mini_2_install_postgresl - sp_mini_3_build_go_projects - sp_mini_4_setup_apps - sp_mini_5_build_ui - sp_mini_6_copy_ui_folders - sp_mini_7_setup_init - - sp_mini_8_configure + - docker diff --git a/provisioning/without_building_ui_and_go_projects.yml b/provisioning/without_building_ui_and_go_projects.yml index b7483aca..99402670 100644 --- a/provisioning/without_building_ui_and_go_projects.yml +++ b/provisioning/without_building_ui_and_go_projects.yml @@ -7,8 +7,7 @@ roles: - sp_mini_1_create_dirs - - sp_mini_2_install_postgresl - sp_mini_4_setup_apps - sp_mini_6_copy_ui_folders - sp_mini_7_setup_init - - sp_mini_8_configure + - docker diff --git a/utils/scripts/user_data.sh b/utils/scripts/user_data.sh index 25486991..49412c51 100755 --- a/utils/scripts/user_data.sh +++ b/utils/scripts/user_data.sh @@ -1,22 +1,23 @@ #!/bin/bash -# NOTE: Use `uuidgen` to create new `uid` -iglu_server_super_uid="980ae3ab-3aba-4ffe-a3c2-3b2e24e2ffce" +set -e -x domain_name=example.com - -username=USERNAME_PLACEHOLDER -password=PASSWORD_PLACEHOLDER +username='username' +password='password' +iglu_server_super_uid='deadbeef-dead-beef-dead-beefdeadbeef' # DO NOT ALTER BELOW # -sudo service snowplow_mini_control_plane_api start -sleep 2 +sudo /usr/local/bin/docker-compose -f /home/ubuntu/snowplow/docker-compose.yml restart iglu-server +sudo service snowplow_mini_control_plane_api restart -#add apiKey to iglu-resolver.json for auth in the iglu server -curl -XPOST -d "iglu_server_super_uuid=$iglu_server_super_uid" localhost:10000/local-iglu +sleep 10 -#add domain name to Caddyfile +# Add domain name to Caddyfile curl -XPOST -d "domain_name=$domain_name" localhost:10000/domain-name -#add username and password to Caddyfile for basic auth +# Add username and password to Caddyfile for basic auth curl -XPOST -d "new_username=$username&new_password=$password" localhost:10000/credentials + +# Add apiKey to iglu-resolver.json for auth in the iglu server +curl -XPOST -d "local_iglu_apikey=$iglu_server_super_uid" localhost:10000/local-iglu-apikey diff --git a/vagrant/push.bash b/vagrant/push.bash index da762e8b..39266ee7 100755 --- a/vagrant/push.bash +++ b/vagrant/push.bash @@ -1,6 +1,7 @@ #!/bin/bash set -e +########## FUNCTIONS ################# # Similar to Perl die function die() { echo "$@" 1>&2 ; exit 1; @@ -38,6 +39,7 @@ function cd_root() { dir="$( cd -P "$( dirname "${source}" )/.." && pwd )" cd ${dir} } +###################################### cd_root @@ -45,15 +47,105 @@ cd_root running=0 && is_running "running" [ ${running} -eq 1 ] || die "Vagrant guest must be running to push" -# Can't pass args thru vagrant push so have to prompt -read -e -p "Please enter your AWS_ACCESS_KEY_ID: " aws_access_key_id -read -e -p "Please enter your AWS_SECRET_ACCESS_KEY: " aws_secret_access_key +###### input validation ###### +if [ "$#" -ne 2 ] +then + echo "Inappropriate number of arguments! Aborted." + echo "Usage: push.bash " + exit 1 +else + SP_MINI_PLATFORM="$1" + SP_MINI_SIZE="$2" -# Build AMI -cmd="export AWS_ACCESS_KEY_ID=$aws_access_key_id && \ - export AWS_SECRET_ACCESS_KEY=$aws_secret_access_key && \ - cd /vagrant && \ - packer build Packerfile.json" -vagrant ssh -c "${cmd}" + if [ "$SP_MINI_PLATFORM" == "aws" -o "$SP_MINI_PLATFORM" == "gcp" ] + then + echo "Platform recognized!" + else + echo "Unrecognized platform! Aborted." + echo "Supported platforms; aws, gcp." + exit 1 + fi -exit 0 + if [ "$SP_MINI_SIZE" == "large" ] + then + # prepare env vars for docker-compose + # to be used as -Xmx jvm option for Elasticsearch & Snowplow apps + echo -n > provisioning/roles/docker/files/.env + echo "ES_JVM_SIZE=4g" >> provisioning/roles/docker/files/.env + echo "SP_JVM_SIZE=512m" >> provisioning/roles/docker/files/.env + # prepare env var for packer + # to be used to determine which instance type to use + if [ "$SP_MINI_PLATFORM" == "aws" ] + then + platform_cmd="export AWS_INSTANCE_TYPE=t2.large" + elif [ "$SP_MINI_PLATFORM" == "gcp" ] + then + platform_cmd="export GCP_MACHINE_TYPE=n1-standard-2" + fi + elif [ "$SP_MINI_SIZE" == "xlarge" ] + then + # prepare env vars for docker-compose + # to be used as -Xmx jvm option for Elasticsearch & Snowplow apps + echo -n > provisioning/roles/docker/files/.env + echo "ES_JVM_SIZE=8g" >> provisioning/roles/docker/files/.env + echo "SP_JVM_SIZE=1536m" >> provisioning/roles/docker/files/.env + # prepare env var for packer + # to be used to determine which instance type to use + if [ "$SP_MINI_PLATFORM" == "aws" ] + then + platform_cmd="export AWS_INSTANCE_TYPE=t2.xlarge" + elif [ "$SP_MINI_PLATFORM" == "gcp" ] + then + platform_cmd="export GCP_MACHINE_TYPE=n1-standard-4" + fi + elif [ "$SP_MINI_SIZE" == "xxlarge" ] + then + # prepare env vars for docker-compose + # to be used as -Xmx jvm option for Elasticsearch & Snowplow apps + echo -n > provisioning/roles/docker/files/.env + echo "ES_JVM_SIZE=16g" >> provisioning/roles/docker/files/.env + echo "SP_JVM_SIZE=3g" >> provisioning/roles/docker/files/.env + # prepare env var for packer + # to be used to determine which instance type to use + if [ "$SP_MINI_PLATFORM" == "aws" ] + then + platform_cmd="export AWS_INSTANCE_TYPE=t2.2xlarge" + elif [ "$SP_MINI_PLATFORM" == "gcp" ] + then + platform_cmd="export GCP_MACHINE_TYPE=n1-standard-8" + fi + else + echo "Unrecognized size! Aborted." + echo "Available sizes; large, xlarge and xxlarge." + exit 1 + fi +fi +############################################### + +if [ "$SP_MINI_PLATFORM" == "aws" ] +then + # Can't pass args through vagrant push so have to prompt + read -e -p "Please enter your AWS_ACCESS_KEY_ID: " aws_access_key_id + read -e -p "Please enter your AWS_SECRET_ACCESS_KEY: " aws_secret_access_key + + # Build AMI + cmd="$platform_cmd && \ + export SP_MINI_SIZE=$SP_MINI_SIZE && \ + export AWS_ACCESS_KEY_ID=$aws_access_key_id && \ + export AWS_SECRET_ACCESS_KEY=$aws_secret_access_key && \ + cd /vagrant && \ + packer build -only=amazon-ebs Packerfile.json" +elif [ "$SP_MINI_PLATFORM" == "gcp" ] +then + echo "GCP uses account.json file to authenticate." + echo "Make sure account.json and Packerfile.json are in same directory!" + cmd="$platform_cmd && \ + export SP_MINI_SIZE=$SP_MINI_SIZE && \ + cd /vagrant && \ + packer build -only=googlecompute Packerfile.json" +else + echo "Unrecognized platform. Aborted." + exit 1 +fi + +vagrant ssh -c "${cmd}"