Skip to content

Commit

Permalink
Merge branch 'release/r29'
Browse files Browse the repository at this point in the history
  • Loading branch information
chuwy committed Jun 11, 2018
2 parents 2343b1d + b3be807 commit 9106f61
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 135 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
Release 29 (2018-06-11)
-----------------------
RDB Shredder: bump to 0.13.1 (#105)
RDB Shredder: bump scala-common-enrich to 0.32.0 (#99)
RDB Shredder: align PostgresConstraints with atomic.events 0.10.0 (#103)

Release 28 (2017-11-13)
-----------------------
Common: add CI/CD (#55)
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ lazy val loader = project.in(file("."))
lazy val shredder = project.in(file("shredder"))
.settings(
name := "snowplow-rdb-shredder",
version := "0.13.0",
version := "0.13.1",
description := "Spark job to shred event and context JSONs from Snowplow enriched events",
BuildSettings.oneJvmPerTestSetting // ensures that only CrossBatchDeduplicationSpec has a DuplicateStorage
)
Expand Down
2 changes: 1 addition & 1 deletion project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ object Dependencies {

// Scala (Shredder)
val spark = "2.2.0"
val commonEnrich = "0.27.0"
val commonEnrich = "0.32.0"

// Java (Loader)
val postgres = "42.0.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,137 +17,137 @@ package com.snowplowanalytics.snowplow.storage.spark.utils
/** Limits on the size of fields for Postgres "null" indicates a type other than char or varchar */
object PostgresConstraints {
val maxFieldLengths = List(
255,
255,
null,
null,
null,
128,
36,
null,
128,
100,
100,
100,
255,
45,
50,
36,
null,
38,
2,
2,
75,
15,
null,
null,
100,
100,
100,
100,
100,
null,
2000,
null,
16,
255,
null,
3000,
6000,
3000,
16,
255,
null,
6000,
6000,
3000,
25,
50,
255,
255,
255,
255,
500,
255,
null,
1000,
1000,
1000,
1000,
null,
null,
255,
255,
null,
null,
null,
255,
255,
255,
255,
255,
255,
255,
null,
null,
null,
null,
null,
null,
1000,
50,
50,
50,
50,
50,
255,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
12,
null,
null,
50,
50,
50,
50,
50,
null,
null,
null,
128,
null,
null,
3,
null,
null,
null,
3,
null,
3,
64,
128,
64,
500,
null,
36,
null,
null,
36,
null,
1000,
1000,
128,
128,
128,
null
255, // app_id
255, // platform
null, // etl_tstamp
null, // collector_tstamp
null, // dvce_created_tstamp
128, // event
36, // event_id
null, // txn_id
128, // name_tracker
100, // v_tracker
100, // v_collector
100, // v_etl
255, // user_id
128, // user_ipaddress
128, // user_fingerprint
128, // domain_userid
null, // domain_sessionidx
128, // network_userid
2, // geo_country
2, // geo_region
75, // geo_city
15, // geo_zipcode
null, // geo_latitude
null, // geo_longitude
100, // geo_region_name
100, // ip_isp
128, // ip_organization
128, // ip_domain
100, // ip_netspeed
4096, // page_url
2000, // page_title
4096, // page_referrer
16, // page_urlscheme
255, // page_urlhost
null, // page_urlport
3000, // page_urlpath
6000, // page_urlquery
3000, // page_urlfragment
16, // refr_urlscheme
255, // refr_urlhost
null, // refr_urlport
6000, // refr_urlpath
6000, // refr_urlquery
3000, // refr_urlfragment
25, // refr_medium
50, // refr_source
255, // refr_term
255, // mkt_medium
255, // mkt_source
255, // mkt_term
500, // mkt_content
255, // mkt_campaign
null, // contexts (deleted)
1000, // se_category
1000, // se_action
4096, // se_label
1000, // se_property
null, // se_value
null, // unstruct_event (deleted)
255, // tr_orderid
255, // tr_affiliation
null, // tr_total
null, // tr_tax
null, // tr_shipping
255, // tr_city
255, // tr_state
255, // tr_country
255, // ti_orderid
255, // ti_sku
255, // ti_name
255, // ti_category
null, // ti_price
null, // ti_quantity
null, // pp_xoffset_min
null, // pp_xoffset_max
null, // pp_yoffset_min
null, // pp_yoffset_max
1000, // useragent
50, // br_name
50, // br_family
50, // br_version
50, // br_type
50, // br_renderengine
255, // br_lang
null, // br_features_pdf
null, // br_features_flash
null, // br_features_java
null, // br_features_director
null, // br_features_quicktime
null, // br_features_realplayer
null, // br_features_windowsmedia
null, // br_features_gears
null, // br_features_silverlight
null, // br_cookies
12, // br_colordepth
null, // br_viewwidth
null, // br_viewheight
50, // os_name
50, // os_family
50, // os_manufacturer
255, // os_timezone
50, // dvce_type
null, // dvce_ismobile
null, // dvce_screenwidth
null, // dvce_screenheight
128, // doc_charset
null, // doc_width
null, // doc_height
3, // tr_currency
null, // tr_total_base
null, // tr_tax_base
null, // tr_shipping_base
3, // ti_currency
null, // ti_price_base
3, // base_currency
64, // geo_timezone
128, // mkt_clickid
64, // mkt_network
500, // etl_tags
null, // dvce_sent_tstamp
128, // refr_domain_userid
null, // refr_dvce_tstamp
null, // derived_contexts (deleted)
128, // domain_sessionid
null, // derived_tstamp
1000, // event_vendor
1000, // event_name
128, // event_format
128, // event_version
128, // event_fingerprint
null // true_tstamp
) map {
case i: Int => Some(i)
case _ => None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ object InvalidJsonsSpec {
"""snowplowweb web 2014-06-01 14:04:11.639 2014-05-29 18:04:12.000 2014-05-29 18:04:11.639 page_view a4583919-4df8-496a-917b-d40fa1c8ca7f 836413 clojure js-2.0.0-M2 clj-0.6.0-tom-0.0.4 hadoop-0.5.0-common-0.4.0 216.207.42.134 3499345421 3b1d1a375044eede 3 2bad2a4e-aae4-4bea-8acd-399e7fe0366a US CA South San Francisco 37.654694 -122.4077 http://snowplowanalytics.com/blog/2013/02/08/writing-hive-udfs-and-serdes/ Writing Hive UDFs - a tutorial http snowplowanalytics.com 80 /blog/2013/02/08/writing-hive-udfs-and-serdes/ &&& |%| Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14 Safari Safari Browser WEBKIT en-us 0 0 0 0 0 0 0 0 0 1 24 1440 1845 Mac OS Mac OS Apple Inc. America/Los_Angeles Computer 0 1440 900 UTF-8 1440 6015"""
)
val expected = (line: String) =>
"""{"line":"%s","errors":[{"level":"error","message":"Field [ue_properties]: invalid JSON [|%%|] with parsing error: Unexpected character ('|' (code 124)): expected a valid value (number, String, array, object, 'true', 'false' or 'null') at [Source: |%%|; line: 1, column: 2]"},{"level":"error","message":"Field [context]: invalid JSON [&&&] with parsing error: Unexpected character ('&' (code 38)): expected a valid value (number, String, array, object, 'true', 'false' or 'null') at [Source: &&&; line: 1, column: 2]"}]}"""
.format(line)
"""{"line":"snowplowweb\tweb\t2014-06-01 14:04:11.639\t2014-05-29 18:04:12.000\t2014-05-29 18:04:11.639\tpage_view\ta4583919-4df8-496a-917b-d40fa1c8ca7f\t836413\tclojure\tjs-2.0.0-M2\tclj-0.6.0-tom-0.0.4\thadoop-0.5.0-common-0.4.0\t\t216.207.42.134\t3499345421\t3b1d1a375044eede\t3\t2bad2a4e-aae4-4bea-8acd-399e7fe0366a\tUS\tCA\tSouth San Francisco\t\t37.654694\t-122.4077\t\t\t\t\t\thttp://snowplowanalytics.com/blog/2013/02/08/writing-hive-udfs-and-serdes/\tWriting Hive UDFs - a tutorial\t\thttp\tsnowplowanalytics.com\t80\t/blog/2013/02/08/writing-hive-udfs-and-serdes/\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t&&&\t\t\t\t\t\t|%|\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tMozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14\tSafari\tSafari\t\tBrowser\tWEBKIT\ten-us\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t24\t1440\t1845\tMac OS\tMac OS\tApple Inc.\tAmerica/Los_Angeles\tComputer\t0\t1440\t900\tUTF-8\t1440\t6015","errors":[{"level":"error","message":"Field [ue_properties]: invalid JSON [|%|] with parsing error: Unexpected character ('|' (code 124)): expected a valid value (number, String, array, object, 'true', 'false' or 'null') at [Source: (String)\"|%|\"; line: 1, column: 2]"},{"level":"error","message":"Field [context]: invalid JSON [&&&] with parsing error: Unexpected character ('&' (code 38)): expected a valid value (number, String, array, object, 'true', 'false' or 'null') at [Source: (String)\"&&&\"; line: 1, column: 2]"}]}"""
.replaceAll("[\t]","\\\\t")
}

Expand Down

0 comments on commit 9106f61

Please sign in to comment.