diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala index 7bb4b4794..736cd661e 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/EtlPipeline.scala @@ -10,9 +10,8 @@ */ package com.snowplowanalytics.snowplow.enrich.common -import cats.Monad import cats.data.{Validated, ValidatedNel} -import cats.effect.Clock +import cats.effect.kernel.Sync import cats.implicits._ import org.joda.time.DateTime @@ -56,7 +55,7 @@ object EtlPipeline { * @return the ValidatedMaybeCanonicalOutput. Thanks to flatMap, will include any validation * errors contained within the ValidatedMaybeCanonicalInput */ - def processEvents[F[_]: Clock: Monad]( + def processEvents[F[_]: Sync]( adapterRegistry: AdapterRegistry[F], enrichmentRegistry: EnrichmentRegistry[F], client: IgluCirceClient[F], @@ -90,11 +89,11 @@ object EtlPipeline { .toValidated } case Validated.Invalid(badRow) => - Monad[F].pure(List(badRow.invalid[EnrichedEvent])) + Sync[F].pure(List(badRow.invalid[EnrichedEvent])) } case Validated.Invalid(badRows) => - Monad[F].pure(badRows.map(_.invalid[EnrichedEvent])).map(_.toList) + Sync[F].pure(badRows.map(_.invalid[EnrichedEvent])).map(_.toList) case Validated.Valid(None) => - Monad[F].pure(List.empty[Validated[BadRow, EnrichedEvent]]) + Sync[F].pure(List.empty[Validated[BadRow, EnrichedEvent]]) } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFields.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFields.scala index 27d97a5a4..26e513760 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFields.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFields.scala @@ -10,6 +10,15 @@ */ package com.snowplowanalytics.snowplow.enrich.common.enrichments +import cats.data.NonEmptyList + +import com.snowplowanalytics.snowplow.badrows.FailureDetails + +import com.snowplowanalytics.iglu.client.ClientError.ValidationError +import com.snowplowanalytics.iglu.client.validator.{ValidatorError, ValidatorReport} + +import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer} + import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicFields.LimitedAtomicField import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -17,6 +26,8 @@ final case class AtomicFields(value: List[LimitedAtomicField]) object AtomicFields { + val atomicSchema = SchemaKey("com.snowplowanalytics.snowplow", "atomic", "jsonschema", SchemaVer.Full(1, 0, 0)) + final case class AtomicField( name: String, enrichedValueExtractor: EnrichedEvent => String @@ -121,4 +132,13 @@ object AtomicFields { AtomicFields(withLimits) } + + def errorsToSchemaViolation(errors: NonEmptyList[ValidatorReport]): FailureDetails.SchemaViolation = { + val clientError = ValidationError(ValidatorError.InvalidData(errors), None) + + FailureDetails.SchemaViolation.IgluError( + AtomicFields.atomicSchema, + clientError + ) + } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFieldsLengthValidator.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFieldsLengthValidator.scala index 5f2f98582..4eb514a49 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFieldsLengthValidator.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/AtomicFieldsLengthValidator.scala @@ -14,14 +14,14 @@ import org.slf4j.LoggerFactory import cats.Monad import cats.data.Validated.{Invalid, Valid} -import cats.data.{NonEmptyList, ValidatedNel} +import cats.data.NonEmptyList import cats.implicits._ -import com.snowplowanalytics.snowplow.badrows.FailureDetails.EnrichmentFailure -import com.snowplowanalytics.snowplow.badrows.{BadRow, FailureDetails, Processor} +import com.snowplowanalytics.iglu.client.validator.ValidatorReport + +import com.snowplowanalytics.snowplow.badrows.FailureDetails -import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicFields.LimitedAtomicField import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -35,66 +35,47 @@ object AtomicFieldsLengthValidator { def validate[F[_]: Monad]( event: EnrichedEvent, - rawEvent: RawEvent, - processor: Processor, acceptInvalid: Boolean, invalidCount: F[Unit], atomicFields: AtomicFields - ): F[Either[BadRow, Unit]] = + ): F[Either[FailureDetails.SchemaViolation, Unit]] = atomicFields.value - .map(validateField(event)) + .map(field => validateField(event, field).toValidatedNel) .combineAll match { case Invalid(errors) if acceptInvalid => - handleAcceptableBadRow(invalidCount, event, errors) *> Monad[F].pure(Right(())) + handleAcceptableErrors(invalidCount, event, errors) *> Monad[F].pure(Right(())) case Invalid(errors) => - Monad[F].pure(buildBadRow(event, rawEvent, processor, errors).asLeft) + Monad[F].pure(AtomicFields.errorsToSchemaViolation(errors).asLeft) case Valid(()) => Monad[F].pure(Right(())) } private def validateField( - event: EnrichedEvent - )( + event: EnrichedEvent, atomicField: LimitedAtomicField - ): ValidatedNel[String, Unit] = { + ): Either[ValidatorReport, Unit] = { val actualValue = atomicField.value.enrichedValueExtractor(event) if (actualValue != null && actualValue.length > atomicField.limit) - s"Field ${atomicField.value.name} longer than maximum allowed size ${atomicField.limit}".invalidNel + ValidatorReport( + s"Field is longer than maximum allowed size ${atomicField.limit}", + Some(atomicField.value.name), + Nil, + Some(actualValue) + ).asLeft else - Valid(()) + Right(()) } - private def buildBadRow( - event: EnrichedEvent, - rawEvent: RawEvent, - processor: Processor, - errors: NonEmptyList[String] - ): BadRow.EnrichmentFailures = - EnrichmentManager.buildEnrichmentFailuresBadRow( - NonEmptyList( - asEnrichmentFailure("Enriched event does not conform to atomic schema field's length restrictions"), - errors.toList.map(asEnrichmentFailure) - ), - EnrichedEvent.toPartiallyEnrichedEvent(event), - RawEvent.toRawEvent(rawEvent), - processor - ) - - private def handleAcceptableBadRow[F[_]: Monad]( + private def handleAcceptableErrors[F[_]: Monad]( invalidCount: F[Unit], event: EnrichedEvent, - errors: NonEmptyList[String] + errors: NonEmptyList[ValidatorReport] ): F[Unit] = invalidCount *> Monad[F].pure( logger.debug( - s"Enriched event not valid against atomic schema. Event id: ${event.event_id}. Invalid fields: ${errors.toList.mkString(",")}" + s"Enriched event not valid against atomic schema. Event id: ${event.event_id}. Invalid fields: ${errors.map(_.path).toList.flatten.mkString(", ")}" ) ) - private def asEnrichmentFailure(errorMessage: String): EnrichmentFailure = - EnrichmentFailure( - enrichment = None, - FailureDetails.EnrichmentFailureMessage.Simple(errorMessage) - ) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/ClientEnrichments.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/ClientEnrichments.scala index 506b4560a..3a5de76f8 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/ClientEnrichments.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/ClientEnrichments.scala @@ -14,7 +14,7 @@ import java.lang.{Integer => JInteger} import cats.syntax.either._ -import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.iglu.client.validator.ValidatorReport /** * Contains enrichments related to the client - where the client is the software which is using the @@ -36,21 +36,16 @@ object ClientEnrichments { * @param res The packed string holding the screen dimensions * @return the ResolutionTuple or an error message, boxed in a Scalaz Validation */ - val extractViewDimensions: (String, String) => Either[FailureDetails.EnrichmentFailure, (JInteger, JInteger)] = + val extractViewDimensions: (String, String) => Either[ValidatorReport, (JInteger, JInteger)] = (field, res) => (res match { case ResRegex(width, height) => Either .catchNonFatal((width.toInt: JInteger, height.toInt: JInteger)) - .leftMap(_ => "could not be converted to java.lang.Integer s") - case _ => s"does not conform to regex ${ResRegex.toString}".asLeft + .leftMap(_ => "Could not be converted to java.lang.Integer s") + case _ => s"Does not conform to regex ${ResRegex.toString}".asLeft }).leftMap { msg => - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(res), - msg - ) - FailureDetails.EnrichmentFailure(None, f) + ValidatorReport(msg, Some(field), Nil, Option(res)) } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala index 1d238fdc2..d8b0bd4fb 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EnrichmentManager.scala @@ -17,13 +17,14 @@ import org.joda.time.DateTime import io.circe.Json import cats.{Applicative, Monad} import cats.data.{EitherT, NonEmptyList, OptionT, StateT} -import cats.effect.Clock +import cats.effect.kernel.{Clock, Sync} import cats.implicits._ import com.snowplowanalytics.refererparser._ import com.snowplowanalytics.iglu.client.IgluCirceClient import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup +import com.snowplowanalytics.iglu.client.validator.ValidatorReport import com.snowplowanalytics.iglu.core.SelfDescribingData import com.snowplowanalytics.iglu.core.circe.implicits._ @@ -42,6 +43,8 @@ import com.snowplowanalytics.snowplow.enrich.common.enrichments.registry.sqlquer import com.snowplowanalytics.snowplow.enrich.common.enrichments.web.{PageEnrichments => WPE} import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent import com.snowplowanalytics.snowplow.enrich.common.utils.{IgluUtils, ConversionUtils => CU} +import _root_.com.snowplowanalytics.iglu.core.SchemaKey +import com.snowplowanalytics.iglu.core.SchemaVer object EnrichmentManager { @@ -56,7 +59,7 @@ object EnrichmentManager { * @param invalidCount Function to increment the count of invalid events * @return Enriched event or bad row if a problem occured */ - def enrichEvent[F[_]: Monad: Clock]( + def enrichEvent[F[_]: Sync]( registry: EnrichmentRegistry[F], client: IgluCirceClient[F], processor: Processor, @@ -68,8 +71,15 @@ object EnrichmentManager { atomicFields: AtomicFields ): EitherT[F, BadRow, EnrichedEvent] = for { - enriched <- EitherT.fromEither[F](setupEnrichedEvent(raw, etlTstamp, processor)) - extractResult <- IgluUtils.extractAndValidateInputJsons(enriched, client, raw, processor, registryLookup) + enriched <- EitherT.rightT[F, BadRow](new EnrichedEvent) + extractResult <- mapAndValidateInput( + raw, + enriched, + etlTstamp, + processor, + client, + registryLookup + ) _ = { ME.formatUnstructEvent(extractResult.unstructEvent).foreach(e => enriched.unstruct_event = e) ME.formatContexts(extractResult.contexts).foreach(c => enriched.contexts = c) @@ -84,19 +94,41 @@ object EnrichmentManager { featureFlags.legacyEnrichmentOrder ) _ = ME.formatContexts(enrichmentsContexts ::: extractResult.validationInfoContexts).foreach(c => enriched.derived_contexts = c) - _ <- IgluUtils - .validateEnrichmentsContexts[F](client, enrichmentsContexts, raw, processor, enriched, registryLookup) - _ <- EitherT.rightT[F, BadRow]( - anonIp(enriched, registry.anonIp).foreach(enriched.user_ipaddress = _) + _ <- validateEnriched( + enriched, + raw, + enrichmentsContexts, + client, + processor, + registryLookup, + featureFlags.acceptInvalid, + invalidCount, + atomicFields ) - _ <- EitherT.rightT[F, BadRow] { - piiTransform(enriched, registry.piiPseudonymizer).foreach { pii => - enriched.pii = pii.asString - } - } - _ <- validateEnriched(enriched, raw, processor, featureFlags.acceptInvalid, invalidCount, atomicFields) } yield enriched + private def mapAndValidateInput[F[_]: Sync]( + raw: RawEvent, + enrichedEvent: EnrichedEvent, + etlTstamp: DateTime, + processor: Processor, + client: IgluCirceClient[F], + registryLookup: RegistryLookup[F] + ): EitherT[F, BadRow, IgluUtils.EventExtractResult] = + EitherT { + for { + setup <- setupEnrichedEvent[F](raw, enrichedEvent, etlTstamp, processor).map(_.toValidatedNel) + iglu <- IgluUtils.extractAndValidateInputJsons(enrichedEvent, client, registryLookup) + } yield (iglu <* setup).leftMap { violations => + buildSchemaViolationsBadRow( + violations, + EnrichedEvent.toPartiallyEnrichedEvent(enrichedEvent), + RawEvent.toRawEvent(raw), + processor + ) + }.toEither + } + /** * Run all the enrichments and aggregate the errors if any * @param enriched /!\ MUTABLE enriched event, mutated IN-PLACE /!\ @@ -112,7 +144,7 @@ object EnrichmentManager { inputContexts: List[SelfDescribingData[Json]], unstructEvent: Option[SelfDescribingData[Json]], legacyOrder: Boolean - ): EitherT[F, BadRow.EnrichmentFailures, List[SelfDescribingData[Json]]] = + ): EitherT[F, BadRow, List[SelfDescribingData[Json]]] = EitherT { accState(registry, raw, inputContexts, unstructEvent, legacyOrder) .runS(Accumulation(enriched, Nil, Nil)) @@ -132,6 +164,31 @@ object EnrichmentManager { } } + private def validateEnriched[F[_]: Clock: Monad]( + enriched: EnrichedEvent, + raw: RawEvent, + enrichmentsContexts: List[SelfDescribingData[Json]], + client: IgluCirceClient[F], + processor: Processor, + registryLookup: RegistryLookup[F], + acceptInvalid: Boolean, + invalidCount: F[Unit], + atomicFields: AtomicFields + ): EitherT[F, BadRow, Unit] = + EitherT { + for { + atomic <- AtomicFieldsLengthValidator.validate[F](enriched, acceptInvalid, invalidCount, atomicFields).map(_.toValidatedNel) + contexts <- IgluUtils.validateEnrichmentsContexts[F](client, enrichmentsContexts, registryLookup) + } yield (atomic |+| contexts).void.leftMap { violations => + buildSchemaViolationsBadRow( + violations, + EnrichedEvent.toPartiallyEnrichedEvent(enriched), + RawEvent.toRawEvent(raw), + processor + ) + }.toEither + } + private[enrichments] case class Accumulation( event: EnrichedEvent, errors: List[FailureDetails.EnrichmentFailure], @@ -217,6 +274,8 @@ object EnrichmentManager { _ <- geoLocation[F](registry.ipLookups) // Execute IP lookup enrichment _ <- sqlContexts // Derive some contexts with custom SQL Query enrichment _ <- apiContexts // Derive some contexts with custom API Request enrichment + _ <- anonIp[F](registry.anonIp) // Anonymize the IP + _ <- piiTransform[F](registry.piiPseudonymizer) // Run PII pseudonymization // format: on } yield () else @@ -243,48 +302,43 @@ object EnrichmentManager { _ <- registry.javascriptScript.traverse(getJsScript[F](_)) // Execute the JavaScript scripting enrichment _ <- sqlContexts // Derive some contexts with custom SQL Query enrichment _ <- apiContexts // Derive some contexts with custom API Request enrichment + _ <- anonIp[F](registry.anonIp) // Anonymize the IP + _ <- piiTransform[F](registry.piiPseudonymizer) // Run PII pseudonymization // format: on } yield () } - /** Create the mutable [[EnrichedEvent]] and initialize it. */ - private def setupEnrichedEvent( + /** Initialize the mutable [[EnrichedEvent]]. */ + private def setupEnrichedEvent[F[_]: Sync]( raw: RawEvent, + e: EnrichedEvent, etlTstamp: DateTime, processor: Processor - ): Either[BadRow.EnrichmentFailures, EnrichedEvent] = { - val e = new EnrichedEvent() - e.event_id = EE.generateEventId() // May be updated later if we have an `eid` parameter - e.v_collector = raw.source.name // May be updated later if we have a `cv` parameter - e.v_etl = ME.etlVersion(processor) - e.etl_tstamp = EE.toTimestamp(etlTstamp) - e.network_userid = raw.context.userId.map(_.toString).orNull // May be updated later by 'nuid' - e.user_ipaddress = ME - .extractIp("user_ipaddress", raw.context.ipAddress.orNull) - .toOption - .orNull // May be updated later by 'ip' - // May be updated later if we have a `ua` parameter - setUseragent(e, raw.context.useragent) - // Validate that the collectorTstamp exists and is Redshift-compatible - val collectorTstamp = setCollectorTstamp(e, raw.context.timestamp).toValidatedNel - // Map/validate/transform input fields to enriched event fields - val transformed = Transform.transform(raw, e) - - (collectorTstamp |+| transformed) - .leftMap { enrichmentFailures => - EnrichmentManager.buildEnrichmentFailuresBadRow( - enrichmentFailures, - EnrichedEvent.toPartiallyEnrichedEvent(e), - RawEvent.toRawEvent(raw), - processor - ) - } - .as(e) - .toEither - } + ): F[Either[FailureDetails.SchemaViolation, Unit]] = + Sync[F].delay { + e.event_id = EE.generateEventId() // May be updated later if we have an `eid` parameter + e.v_collector = raw.source.name // May be updated later if we have a `cv` parameter + e.v_etl = ME.etlVersion(processor) + e.etl_tstamp = EE.toTimestamp(etlTstamp) + e.network_userid = raw.context.userId.map(_.toString).orNull // May be updated later by 'nuid' + e.user_ipaddress = ME + .extractIp("user_ipaddress", raw.context.ipAddress.orNull) + .toOption + .orNull // May be updated later by 'ip' + // May be updated later if we have a `ua` parameter + setUseragent(e, raw.context.useragent) + // Validate that the collectorTstamp exists and is Redshift-compatible + val collectorTstamp = setCollectorTstamp(e, raw.context.timestamp).toValidatedNel + // Map/validate/transform input fields to enriched event fields + val transformed = Transform.transform(raw, e) + + (collectorTstamp |+| transformed) + .leftMap(AtomicFields.errorsToSchemaViolation) + .toEither + } - def setCollectorTstamp(event: EnrichedEvent, timestamp: Option[DateTime]): Either[FailureDetails.EnrichmentFailure, Unit] = + def setCollectorTstamp(event: EnrichedEvent, timestamp: Option[DateTime]): Either[ValidatorReport, Unit] = EE.formatCollectorTstamp(timestamp).map { t => event.collector_tstamp = t ().asRight @@ -418,12 +472,21 @@ object EnrichmentManager { result.sequence.bimap(NonEmptyList.one(_), _.toList) } - def anonIp(event: EnrichedEvent, anonIp: Option[AnonIpEnrichment]): Option[String] = - Option(event.user_ipaddress).map { ip => - anonIp match { - case Some(anon) => anon.anonymizeIp(ip) - case None => ip - } + def anonIp[F[_]: Applicative](anonIp: Option[AnonIpEnrichment]): EStateT[F, Unit] = + EStateT.fromEither { + case (event, _) => + anonIp match { + case Some(anon) => + Option(event.user_ipaddress) match { + case Some(ip) => + Option(anon.anonymizeIp(ip)).foreach(event.user_ipaddress = _) + Nil.asRight + case None => + Nil.asRight + } + case None => + Nil.asRight + } } def getUaUtils[F[_]: Applicative](userAgentUtils: Option[UserAgentUtilsEnrichment]): EStateT[F, Unit] = @@ -481,10 +544,14 @@ object EnrichmentManager { event.base_currency = currency.baseCurrency.getCode // Note that jBigDecimalToDouble is applied to either-valid-or-null event POJO // properties, so we don't expect any of these four vals to be a Failure - val trTax = CU.jBigDecimalToDouble("tr_tx", event.tr_tax).toValidatedNel - val tiPrice = CU.jBigDecimalToDouble("ti_pr", event.ti_price).toValidatedNel - val trTotal = CU.jBigDecimalToDouble("tr_tt", event.tr_total).toValidatedNel - val trShipping = CU.jBigDecimalToDouble("tr_sh", event.tr_shipping).toValidatedNel + val enrichmentInfo = FailureDetails.EnrichmentInformation( + SchemaKey("com.snowplowanalytics.snowplow", "currency_conversion_config", "jsonschema", SchemaVer.Full(1, 0, 0)), + "currency_conversion" + ) + val trTax = CU.jBigDecimalToDouble("tr_tx", event.tr_tax, enrichmentInfo).toValidatedNel + val tiPrice = CU.jBigDecimalToDouble("ti_pr", event.ti_price, enrichmentInfo).toValidatedNel + val trTotal = CU.jBigDecimalToDouble("tr_tt", event.tr_total, enrichmentInfo).toValidatedNel + val trShipping = CU.jBigDecimalToDouble("tr_sh", event.tr_shipping, enrichmentInfo).toValidatedNel EitherT( (trTotal, trTax, trShipping, tiPrice) .mapN { @@ -745,11 +812,31 @@ object EnrichmentManager { } } - def piiTransform(event: EnrichedEvent, piiPseudonymizer: Option[PiiPseudonymizerEnrichment]): Option[SelfDescribingData[Json]] = - piiPseudonymizer.flatMap(_.transformer(event)) + def piiTransform[F[_]: Applicative](piiPseudonymizer: Option[PiiPseudonymizerEnrichment]): EStateT[F, Unit] = + EStateT.fromEither { + case (event, _) => + piiPseudonymizer match { + case Some(pseudonymizer) => + pseudonymizer.transformer(event).foreach(p => event.pii = p.asString) + Nil.asRight + case None => + Nil.asRight + } + } + + private def buildSchemaViolationsBadRow( + vs: NonEmptyList[FailureDetails.SchemaViolation], + pee: Payload.PartiallyEnrichedEvent, + re: Payload.RawEvent, + processor: Processor + ): BadRow.SchemaViolations = + BadRow.SchemaViolations( + processor, + Failure.SchemaViolations(Instant.now(), vs), + Payload.EnrichmentPayload(pee, re) + ) - /** Build `BadRow.EnrichmentFailures` from a list of `FailureDetails.EnrichmentFailure`s */ - def buildEnrichmentFailuresBadRow( + private def buildEnrichmentFailuresBadRow( fs: NonEmptyList[FailureDetails.EnrichmentFailure], pee: Payload.PartiallyEnrichedEvent, re: Payload.RawEvent, @@ -761,21 +848,4 @@ object EnrichmentManager { Payload.EnrichmentPayload(pee, re) ) - /** - * Validates enriched events against atomic schema. - * For now it's possible to accept enriched events that are not valid. - * See https://github.com/snowplow/enrich/issues/517#issuecomment-1033910690 - */ - private def validateEnriched[F[_]: Monad]( - enriched: EnrichedEvent, - raw: RawEvent, - processor: Processor, - acceptInvalid: Boolean, - invalidCount: F[Unit], - atomicFields: AtomicFields - ): EitherT[F, BadRow, Unit] = - EitherT { - //We're using static field's length validation. See more in https://github.com/snowplow/enrich/issues/608 - AtomicFieldsLengthValidator.validate[F](enriched, raw, processor, acceptInvalid, invalidCount, atomicFields) - } } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EventEnrichments.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EventEnrichments.scala index eaf10ae62..74d49a485 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EventEnrichments.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/EventEnrichments.scala @@ -19,6 +19,8 @@ import cats.syntax.option._ import org.joda.time.{DateTime, DateTimeZone, Period} import org.joda.time.format.DateTimeFormat +import com.snowplowanalytics.iglu.client.validator.ValidatorReport + import com.snowplowanalytics.snowplow.badrows._ /** Holds the enrichments related to events. */ @@ -47,22 +49,17 @@ object EventEnrichments { * @param Optional collectorTstamp * @return Validation boxing the result of making the timestamp Redshift-compatible */ - def formatCollectorTstamp(collectorTstamp: Option[DateTime]): Either[FailureDetails.EnrichmentFailure, String] = - (collectorTstamp match { - case None => - FailureDetails.EnrichmentFailureMessage - .InputData("collector_tstamp", None, "should be set") - .asLeft + def formatCollectorTstamp(collectorTstamp: Option[DateTime]): Either[ValidatorReport, String] = + collectorTstamp match { + case None => ValidatorReport("Field not set", Some("collector_tstamp"), Nil, None).asLeft case Some(t) => val formattedTimestamp = toTimestamp(t) if (formattedTimestamp.startsWith("-") || t.getYear > 9999 || t.getYear < 0) { - val msg = s"formatted as $formattedTimestamp is not Redshift-compatible" - FailureDetails.EnrichmentFailureMessage - .InputData("collector_tstamp", t.toString.some, msg) - .asLeft + val msg = s"Formatted as $formattedTimestamp is not Redshift-compatible" + ValidatorReport(msg, Some("collector_tstamp"), Nil, Some(t.toString)).asLeft } else formattedTimestamp.asRight - }).leftMap(FailureDetails.EnrichmentFailure(None, _)) + } /** * Calculate the derived timestamp @@ -103,7 +100,7 @@ object EventEnrichments { .EnrichmentFailure( None, FailureDetails.EnrichmentFailureMessage.Simple( - s"exception calculating derived timestamp: ${e.getMessage}" + s"Exception calculating derived timestamp: ${e.getMessage}" ) ) .asLeft @@ -116,30 +113,28 @@ object EventEnrichments { * @param tstamp The timestamp as stored in the Tracker Protocol * @return a Tuple of two Strings (date and time), or an error message if the format was invalid */ - val extractTimestamp: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractTimestamp: (String, String) => Either[ValidatorReport, String] = (field, tstamp) => try { val dt = new DateTime(tstamp.toLong) val timestampString = toTimestamp(dt) - if (timestampString.startsWith("-") || dt.getYear > 9999 || dt.getYear < 0) { - val msg = s"formatting as $timestampString is not Redshift-compatible" - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(tstamp), - msg - ) - FailureDetails.EnrichmentFailure(None, f).asLeft - } else + if (timestampString.startsWith("-") || dt.getYear > 9999 || dt.getYear < 0) + ValidatorReport( + s"Formatting as $timestampString is not Redshift-compatible", + Some(field), + Nil, + Option(tstamp) + ).asLeft + else timestampString.asRight } catch { case _: NumberFormatException => - val msg = "not in the expected format: ms since epoch" - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(tstamp), - msg - ) - FailureDetails.EnrichmentFailure(None, f).asLeft + ValidatorReport( + "Not in the expected format: ms since epoch", + Some(field), + Nil, + Option(tstamp) + ).asLeft } /** @@ -149,7 +144,7 @@ object EventEnrichments { * @param eventCode The event code * @return the event type, or an error message if not recognised, boxed in a Scalaz Validation */ - val extractEventType: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractEventType: (String, String) => Either[ValidatorReport, String] = (field, code) => code match { case "se" => "struct".asRight @@ -161,13 +156,8 @@ object EventEnrichments { case "pv" => "page_view".asRight case "pp" => "page_ping".asRight case _ => - val msg = "not recognized as an event type" - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(code), - msg - ) - FailureDetails.EnrichmentFailure(None, f).asLeft + val msg = "Not a valid event type" + ValidatorReport(msg, Some(field), Nil, Option(code)).asLeft } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/MiscEnrichments.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/MiscEnrichments.scala index aaa476eb8..4ed0670b6 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/MiscEnrichments.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/MiscEnrichments.scala @@ -14,7 +14,9 @@ import cats.syntax.either._ import io.circe._ -import com.snowplowanalytics.snowplow.badrows.{FailureDetails, Processor} +import com.snowplowanalytics.snowplow.badrows.Processor + +import com.snowplowanalytics.iglu.client.validator.ValidatorReport import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} import com.snowplowanalytics.iglu.core.circe.implicits._ @@ -44,7 +46,7 @@ object MiscEnrichments { * @param platform The code for the platform generating this event. * @return a Scalaz ValidatedString. */ - val extractPlatform: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractPlatform: (String, String) => Either[ValidatorReport, String] = (field, platform) => platform match { case "web" => "web".asRight // Web, including Mobile Web @@ -57,17 +59,12 @@ object MiscEnrichments { case "srv" => "srv".asRight // Server-side App case "headset" => "headset".asRight // AR/VR Headset case _ => - val msg = "not recognized as a tracking platform" - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(platform), - msg - ) - FailureDetails.EnrichmentFailure(None, f).asLeft + val msg = "Not a valid platform" + ValidatorReport(msg, Some(field), Nil, Option(platform)).asLeft } /** Make a String TSV safe */ - val toTsvSafe: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val toTsvSafe: (String, String) => Either[ValidatorReport, String] = (_, value) => CU.makeTsvSafe(value).asRight /** @@ -76,7 +73,7 @@ object MiscEnrichments { * Here we retrieve the first one as it is supposed to be the client one, c.f. * https://en.m.wikipedia.org/wiki/X-Forwarded-For#Format */ - val extractIp: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractIp: (String, String) => Either[ValidatorReport, String] = (_, value) => { val lastIp = Option(value).map(_.split("[,|, ]").head).orNull CU.makeTsvSafe(lastIp).asRight diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala index 00474b069..0c2cf90c3 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/Transform.scala @@ -13,7 +13,7 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments import cats.implicits._ import cats.data.ValidatedNel -import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.iglu.client.validator.ValidatorReport import com.snowplowanalytics.snowplow.enrich.common.enrichments.{EventEnrichments => EE} import com.snowplowanalytics.snowplow.enrich.common.enrichments.{MiscEnrichments => ME} @@ -31,7 +31,7 @@ object Transform { * to "user_ipaddress" in the enriched event * @param enriched /!\ MUTABLE enriched event, mutated IN-PLACE /!\ */ - private[enrichments] def transform(raw: RawEvent, enriched: EnrichedEvent): ValidatedNel[FailureDetails.EnrichmentFailure, Unit] = { + private[enrichments] def transform(raw: RawEvent, enriched: EnrichedEvent): ValidatedNel[ValidatorReport, Unit] = { val sourceMap: SourceMap = raw.parameters.collect { case (k, Some(v)) => (k, v) } val firstPassTransform = enriched.transform(sourceMap, firstPassTransformMap) val secondPassTransform = enriched.transform(sourceMap, secondPassTransformMap) diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CrossNavigationEnrichment.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CrossNavigationEnrichment.scala index 443075822..fa619c47d 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CrossNavigationEnrichment.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/enrichments/registry/CrossNavigationEnrichment.scala @@ -192,7 +192,17 @@ object CrossNavigationEnrichment extends ParseableEnrichment { private def extractTstamp(str: String): Either[FailureDetails.EnrichmentFailure, Option[String]] = str match { case "" => None.asRight - case s => EE.extractTimestamp("sp_dtm", s).map(_.some) + case s => + EE.extractTimestamp("sp_dtm", s) + .leftMap { error => + val f = FailureDetails.EnrichmentFailureMessage.InputData( + error.path.getOrElse(""), + error.keyword, + error.message + ) + FailureDetails.EnrichmentFailure(None, f) + } + .map(_.some) } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala index 39866edcb..9df3ca40e 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/ConversionUtils.scala @@ -42,6 +42,8 @@ import org.joda.time.format.DateTimeFormat import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} +import com.snowplowanalytics.iglu.client.validator.ValidatorReport + import com.snowplowanalytics.snowplow.badrows.{FailureDetails, Processor} import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent @@ -138,7 +140,7 @@ object ConversionUtils { val result = new String(decodedBytes, UTF_8) // Must specify charset (EMR uses US_ASCII) result } - .leftMap(e => s"could not base64 decode: ${e.getMessage}") + .leftMap(e => s"Could not base64 decode: ${e.getMessage}") /** * Encodes a URL-safe Base64 string. @@ -158,19 +160,13 @@ object ConversionUtils { * @param str The String hopefully containing a UUID * @return either the original String, or an error String */ - val validateUuid: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val validateUuid: (String, String) => Either[ValidatorReport, String] = (field, str) => { def check(s: String)(u: UUID): Boolean = u != null && s.toLowerCase == u.toString val uuid = Try(UUID.fromString(str)).toOption.filter(check(str)) uuid match { case Some(_) => str.toLowerCase.asRight - case None => - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - "not a valid UUID" - ) - FailureDetails.EnrichmentFailure(None, f).asLeft + case None => ValidatorReport("Not a valid UUID", Some(field), Nil, Option(str)).asLeft } } @@ -179,17 +175,12 @@ object ConversionUtils { * @param str The String hopefully parseable as an integer * @return either the original String, or an error String */ - val validateInteger: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val validateInteger: (String, String) => Either[ValidatorReport, String] = (field, str) => { Either .catchNonFatal { str.toInt; str } .leftMap { _ => - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - "not a valid integer" - ) - FailureDetails.EnrichmentFailure(None, f) + ValidatorReport("Not a valid integer", Some(field), Nil, Option(str)) } } @@ -214,7 +205,7 @@ object ConversionUtils { val d = URLDecoder.decode(s, enc.toString) d.replaceAll("(\\r|\\n)", "").replaceAll("\\t", " ").asRight } catch { - case NonFatal(e) => s"exception URL-decoding (encoding $enc): ${e.getMessage}".asLeft + case NonFatal(e) => s"Exception URL-decoding (encoding $enc): ${e.getMessage}".asLeft } /** @@ -304,7 +295,7 @@ object ConversionUtils { } match { case util.Success(params) => params.toList.asRight case util.Failure(e) => - val msg = s"could not parse uri, expection was thrown: [$e]." + val msg = s"Could not parse uri. Error: [$e]." val f = FailureDetails.EnrichmentFailureMessage.InputData( "uri", Option(uri).map(_.toString()), @@ -330,19 +321,14 @@ object ConversionUtils { jint.asRight } catch { case _: NumberFormatException => - "cannot be converted to java.lang.Integer".asLeft + "Cannot be converted to java.lang.Integer".asLeft } } - val stringToJInteger2: (String, String) => Either[FailureDetails.EnrichmentFailure, JInteger] = + val stringToJInteger2: (String, String) => Either[ValidatorReport, JInteger] = (field, str) => stringToJInteger(str).leftMap { e => - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - e - ) - FailureDetails.EnrichmentFailure(None, f) + ValidatorReport(e, Some(field), Nil, Option(str)) } val stringToJBigDecimal: String => Either[String, JBigDecimal] = str => @@ -361,18 +347,13 @@ object ConversionUtils { else Right(bd) } - .leftMap(e => s"cannot be converted to java.math.BigDecimal. Error : ${e.getMessage}") + .leftMap(e => s"Cannot be converted to java.math.BigDecimal. Error : ${e.getMessage}") } - val stringToJBigDecimal2: (String, String) => Either[FailureDetails.EnrichmentFailure, JBigDecimal] = + val stringToJBigDecimal2: (String, String) => Either[ValidatorReport, JBigDecimal] = (field, str) => stringToJBigDecimal(str).leftMap { e => - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - e - ) - FailureDetails.EnrichmentFailure(None, f) + ValidatorReport(e, Some(field), Nil, Option(str)) } /** @@ -384,7 +365,7 @@ object ConversionUtils { * @param field The name of the field we are validating. To use in our error message * @return either a failure or a String */ - val stringToDoubleLike: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val stringToDoubleLike: (String, String) => Either[ValidatorReport, String] = (field, str) => Either .catchNonFatal { @@ -397,12 +378,8 @@ object ConversionUtils { } } .leftMap { _ => - val msg = "cannot be converted to Double-like" - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage - .InputData(field, Option(str), msg) - ) + val msg = "Cannot be converted to Double-like" + ValidatorReport(msg, Some(field), Nil, Option(str)) } /** @@ -411,7 +388,7 @@ object ConversionUtils { * @param field The name of the field we are validating. To use in our error message * @return a Scalaz Validation, being either a Failure String or a Success Double */ - def stringToMaybeDouble(field: String, str: String): Either[FailureDetails.EnrichmentFailure, Option[Double]] = + def stringToMaybeDouble(field: String, str: String): Either[ValidatorReport, Option[Double]] = Either .catchNonFatal { if (Option(str).isEmpty || str == "null") @@ -422,50 +399,41 @@ object ConversionUtils { jbigdec.doubleValue().some } } - .leftMap(_ => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - "cannot be converted to Double" - ) - ) - ) + .leftMap(_ => ValidatorReport("Cannot be converted to Double", Some(field), Nil, Option(str))) - /** Convert a java BigDecimal a Double */ - def jBigDecimalToDouble(field: String, f: JBigDecimal): Either[FailureDetails.EnrichmentFailure, Option[Double]] = + /** Convert a java BigDecimal to a Double */ + def jBigDecimalToDouble(field: String, f: JBigDecimal): Either[ValidatorReport, Option[Double]] = Either .catchNonFatal { Option(f).map(_.doubleValue) } - .leftMap(_ => + .leftMap(_ => ValidatorReport("Cannot be converted to Double", Some(field), Nil, Option(f).map(_.toString))) + + /** Convert a java BigDecimal to a Double */ + def jBigDecimalToDouble( + field: String, + f: JBigDecimal, + enrichmentInfo: FailureDetails.EnrichmentInformation + ): Either[FailureDetails.EnrichmentFailure, Option[Double]] = + jBigDecimalToDouble(field, f) + .leftMap { error => FailureDetails.EnrichmentFailure( - None, + Some(enrichmentInfo), FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(f).map(_.toString), - "cannot be converted to Double" + error.path.getOrElse(""), + error.keyword, + error.message ) ) - ) + } /** Convert a Double to a java BigDecimal */ - def doubleToJBigDecimal(field: String, d: Option[Double]): Either[FailureDetails.EnrichmentFailure, Option[JBigDecimal]] = + def doubleToJBigDecimal(field: String, d: Option[Double]): Either[ValidatorReport, Option[JBigDecimal]] = Either .catchNonFatal { d.map(dd => new JBigDecimal(dd)) } - .leftMap(_ => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - field, - d.map(_.toString), - "cannot be converted to java BigDecimal" - ) - ) - ) + .leftMap(_ => ValidatorReport("Cannot be converted to java BigDecimal", Some(field), Nil, d.map(_.toString))) /** * Converts a String to a Double with two decimal places. Used to honor schemas with @@ -475,7 +443,7 @@ object ConversionUtils { val stringToTwoDecimals: String => Either[String, Double] = str => try BigDecimal(str).setScale(2, BigDecimal.RoundingMode.HALF_EVEN).toDouble.asRight catch { - case _: NumberFormatException => "cannot be converted to Double".asLeft + case _: NumberFormatException => "Cannot be converted to Double".asLeft } /** @@ -485,7 +453,7 @@ object ConversionUtils { val stringToDouble: String => Either[String, Double] = str => Either .catchNonFatal(BigDecimal(str).toDouble) - .leftMap(_ => s"cannot be converted to Double") + .leftMap(_ => s"Cannot be converted to Double") /** * Extract a Java Byte representing 1 or 0 only from a String, or error. @@ -493,19 +461,14 @@ object ConversionUtils { * @param field The name of the field we are trying to process. To use in our error message * @return either a Failure String or a Success Byte */ - val stringToBooleanLikeJByte: (String, String) => Either[FailureDetails.EnrichmentFailure, JByte] = + val stringToBooleanLikeJByte: (String, String) => Either[ValidatorReport, JByte] = (field, str) => str match { case "1" => (1.toByte: JByte).asRight case "0" => (0.toByte: JByte).asRight case _ => - val msg = "cannot be converted to Boolean-like java.lang.Byte" - val f = FailureDetails.EnrichmentFailureMessage.InputData( - field, - Option(str), - msg - ) - FailureDetails.EnrichmentFailure(None, f).asLeft + val msg = "Cannot be converted to Boolean-like java.lang.Byte" + ValidatorReport(msg, Some(field), Nil, Option(str)).asLeft } /** @@ -520,7 +483,7 @@ object ConversionUtils { else if (str == "0") false.asRight else - s"cannot be converted to boolean, only 1 or 0 are supported".asLeft + s"Cannot be converted to boolean, only 1 or 0 are supported".asLeft /** * Truncates a String - useful for making sure Strings can't overflow a database field. diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala index d18936818..791174120 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/IgluUtils.scala @@ -19,8 +19,6 @@ import io.circe._ import io.circe.syntax._ import io.circe.generic.semiauto._ -import java.time.Instant - import com.snowplowanalytics.iglu.client.{ClientError, IgluCirceClient} import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup @@ -30,8 +28,6 @@ import com.snowplowanalytics.iglu.core.circe.implicits._ import com.snowplowanalytics.snowplow.badrows._ import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent -import com.snowplowanalytics.snowplow.enrich.common.enrichments.EnrichmentManager -import com.snowplowanalytics.snowplow.enrich.common.adapters.RawEvent /** * Contain the functions to validate: @@ -56,36 +52,19 @@ object IgluUtils { def extractAndValidateInputJsons[F[_]: Monad: Clock]( enriched: EnrichedEvent, client: IgluCirceClient[F], - raw: RawEvent, - processor: Processor, registryLookup: RegistryLookup[F] - ): EitherT[ - F, - BadRow.SchemaViolations, - EventExtractResult - ] = - EitherT { - for { - contexts <- IgluUtils.extractAndValidateInputContexts(enriched, client, registryLookup) - unstruct <- IgluUtils - .extractAndValidateUnstructEvent(enriched, client, registryLookup) - .map(_.toValidatedNel) - } yield (contexts, unstruct) - .mapN { (c, ue) => - val validationInfoContexts = (c.flatMap(_.validationInfo) ::: ue.flatMap(_.validationInfo).toList).distinct - .map(_.toSdj) - EventExtractResult(contexts = c.map(_.sdj), unstructEvent = ue.map(_.sdj), validationInfoContexts = validationInfoContexts) - } - .leftMap { schemaViolations => - buildSchemaViolationsBadRow( - schemaViolations, - EnrichedEvent.toPartiallyEnrichedEvent(enriched), - RawEvent.toRawEvent(raw), - processor - ) - } - .toEither - } + ): F[ValidatedNel[FailureDetails.SchemaViolation, EventExtractResult]] = + for { + contexts <- IgluUtils.extractAndValidateInputContexts(enriched, client, registryLookup) + unstruct <- IgluUtils + .extractAndValidateUnstructEvent(enriched, client, registryLookup) + .map(_.toValidatedNel) + } yield (contexts, unstruct) + .mapN { (c, ue) => + val validationInfoContexts = (c.flatMap(_.validationInfo) ::: ue.flatMap(_.validationInfo).toList).distinct + .map(_.toSdj) + EventExtractResult(contexts = c.map(_.sdj), unstructEvent = ue.map(_.sdj), validationInfoContexts = validationInfoContexts) + } /** * Extract unstructured event from event and validate against its schema @@ -109,7 +88,7 @@ object IgluUtils { // Validate input Json string and extract unstructured event unstruct <- extractInputData(rawUnstructEvent, field, criterion, client, registryLookup) // Parse Json unstructured event as SelfDescribingData[Json] - unstructSDJ <- parseAndValidateSDJ_sv(unstruct, client, registryLookup) + unstructSDJ <- parseAndValidateSDJ(unstruct, client, registryLookup) } yield unstructSDJ.some case None => EitherT.rightT[F, FailureDetails.SchemaViolation](none[SdjExtractResult]) @@ -141,7 +120,7 @@ object IgluUtils { // Parse and validate each SDJ and merge the errors contextsSDJ <- EitherT( contexts - .map(parseAndValidateSDJ_sv(_, client, registryLookup).toValidatedNel) + .map(parseAndValidateSDJ(_, client, registryLookup).toValidatedNel) .sequence .map(_.sequence.toEither) ) @@ -164,31 +143,17 @@ object IgluUtils { private[common] def validateEnrichmentsContexts[F[_]: Monad: Clock]( client: IgluCirceClient[F], sdjs: List[SelfDescribingData[Json]], - raw: RawEvent, - processor: Processor, - enriched: EnrichedEvent, registryLookup: RegistryLookup[F] - ): EitherT[F, BadRow.EnrichmentFailures, Unit] = + ): F[ValidatedNel[FailureDetails.SchemaViolation, Unit]] = checkList(client, sdjs, registryLookup) .leftMap( _.map { case (schemaKey, clientError) => - val enrichmentInfo = - FailureDetails.EnrichmentInformation(schemaKey, "enrichments-contexts-validation") - FailureDetails.EnrichmentFailure( - enrichmentInfo.some, - FailureDetails.EnrichmentFailureMessage.IgluError(schemaKey, clientError) - ) + val f: FailureDetails.SchemaViolation = FailureDetails.SchemaViolation.IgluError(schemaKey, clientError) + f } ) - .leftMap { enrichmentFailures => - EnrichmentManager.buildEnrichmentFailuresBadRow( - enrichmentFailures, - EnrichedEvent.toPartiallyEnrichedEvent(enriched), - RawEvent.toRawEvent(raw), - processor - ) - } + .toValidated /** Used to extract .data for input custom contexts and input unstructured event */ private def extractInputData[F[_]: Monad: Clock]( @@ -257,7 +222,7 @@ object IgluUtils { } /** Parse a Json as a SDJ and check that it's valid */ - private def parseAndValidateSDJ_sv[F[_]: Monad: Clock]( // _sv for SchemaViolation + private def parseAndValidateSDJ[F[_]: Monad: Clock]( json: Json, client: IgluCirceClient[F], registryLookup: RegistryLookup[F] @@ -309,17 +274,4 @@ object IgluUtils { unstructEvent: Option[SelfDescribingData[Json]], validationInfoContexts: List[SelfDescribingData[Json]] ) - - /** Build `BadRow.SchemaViolations` from a list of `FailureDetails.SchemaViolation`s */ - def buildSchemaViolationsBadRow( - vs: NonEmptyList[FailureDetails.SchemaViolation], - pee: Payload.PartiallyEnrichedEvent, - re: Payload.RawEvent, - processor: Processor - ): BadRow.SchemaViolations = - BadRow.SchemaViolations( - processor, - Failure.SchemaViolations(Instant.now(), vs), - Payload.EnrichmentPayload(pee, re) - ) } diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala index 680286044..cf3851ffa 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/JsonUtils.scala @@ -20,7 +20,7 @@ import io.circe.Json import org.joda.time.{DateTime, DateTimeZone} import org.joda.time.format.{DateTimeFormat, DateTimeFormatter} -import com.snowplowanalytics.snowplow.badrows.FailureDetails +import com.snowplowanalytics.iglu.client.validator.ValidatorReport /** Contains general purpose extractors and other utilities for JSONs. Jackson-based. */ object JsonUtils { @@ -32,29 +32,21 @@ object JsonUtils { DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").withZone(DateTimeZone.UTC) /** Validates a String as correct JSON. */ - val extractUnencJson: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractUnencJson: (String, String) => Either[ValidatorReport, String] = (field, str) => validateAndReformatJson(str) .leftMap { e => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage - .InputData(field, Option(str), e) - ) + ValidatorReport(e, Some(field), Nil, Option(str)) } /** Decodes a Base64 (URL safe)-encoded String then validates it as correct JSON. */ - val extractBase64EncJson: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val extractBase64EncJson: (String, String) => Either[ValidatorReport, String] = (field, str) => ConversionUtils .decodeBase64Url(str) .flatMap(validateAndReformatJson) .leftMap { e => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage - .InputData(field, Option(str), e) - ) + ValidatorReport(e, Some(field), Nil, Option(str)) } /** diff --git a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/MapTransformer.scala b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/MapTransformer.scala index 6bc76be1c..7879d9506 100644 --- a/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/MapTransformer.scala +++ b/modules/common/src/main/scala/com.snowplowanalytics.snowplow.enrich/common/utils/MapTransformer.scala @@ -17,7 +17,7 @@ import cats.instances.int._ import cats.syntax.either._ import cats.syntax.validated._ -import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.iglu.client.validator.ValidatorReport /** * The problem we're trying to solve: converting maps to classes in Scala @@ -66,7 +66,7 @@ object MapTransformer { type Field = String // A transformation takes a Key and Value and returns either a failure or anything - type TransformFunc = Function2[Key, Value, Either[FailureDetails.EnrichmentFailure, _]] + type TransformFunc = Function2[Key, Value, Either[ValidatorReport, _]] // Our source map type SourceMap = Map[Key, Value] @@ -88,7 +88,7 @@ object MapTransformer { transformMap: TransformMap )( implicit m: Manifest[T] - ): ValidatedNel[FailureDetails.EnrichmentFailure, T] = { + ): ValidatedNel[ValidatorReport, T] = { val newInst = m.runtimeClass.getDeclaredConstructor().newInstance() val result = _transform(newInst, sourceMap, transformMap, getSetters(m.runtimeClass)) // On success, replace the field count with the new instance @@ -116,7 +116,7 @@ object MapTransformer { * @param transformMap Determines how the data should be transformed before storing in the obj * @return a ValidationNel containing a Nel of error Strings, or the count of updated fields */ - def transform(sourceMap: SourceMap, transformMap: TransformMap): ValidatedNel[FailureDetails.EnrichmentFailure, Int] = + def transform(sourceMap: SourceMap, transformMap: TransformMap): ValidatedNel[ValidatorReport, Int] = _transform[T](obj, sourceMap, transformMap, setters) } @@ -134,8 +134,8 @@ object MapTransformer { sourceMap: SourceMap, transformMap: TransformMap, setters: SettersMap - ): ValidatedNel[FailureDetails.EnrichmentFailure, Int] = { - val results: List[Either[FailureDetails.EnrichmentFailure, Int]] = sourceMap.map { + ): ValidatedNel[ValidatorReport, Int] = { + val results: List[Either[ValidatorReport, Int]] = sourceMap.map { case (key, in) => transformMap.get(key) match { case Some((func, field)) => @@ -172,7 +172,7 @@ object MapTransformer { } }.toList - results.foldLeft(0.validNel[FailureDetails.EnrichmentFailure]) { + results.foldLeft(0.validNel[ValidatorReport]) { case (acc, e) => acc.combine(e.toValidatedNel) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala index d598ca932..7ea29cacb 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/EnrichmentManagerSpec.scala @@ -29,7 +29,6 @@ import io.circe.syntax._ import org.joda.time.DateTime import com.snowplowanalytics.snowplow.badrows._ -import com.snowplowanalytics.snowplow.badrows.FailureDetails.EnrichmentFailureMessage import com.snowplowanalytics.iglu.core.{SchemaCriterion, SchemaKey, SchemaVer, SelfDescribingData} @@ -137,6 +136,62 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE }) } + "return a SchemaViolations bad row that contains 1 ValidationError for the atomic field and 1 ValidationError for the unstruct event" >> { + val parameters = Map( + "e" -> "ue", + "tv" -> "js-0.13.1", + "p" -> "web", + "tr_tt" -> "not number", + "ue_pr" -> + """ + { + "schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0", + "data":{ + "schema":"iglu:com.acme/email_sent/jsonschema/1-0-0", + "data": { + "emailAddress": "hello@world.com", + "emailAddress2": "foo@bar.org", + "unallowedAdditionalField": "foo@bar.org" + } + } + }""" + ).toOpt + val rawEvent = RawEvent(api, parameters, None, source, context) + EnrichmentManager + .enrichEvent[IO]( + enrichmentReg, + client, + processor, + timestamp, + rawEvent, + AcceptInvalid.featureFlags, + IO.unit, + SpecHelpers.registryLookup, + atomicFieldLimits + ) + .value + .map(_ must beLeft.like { + case BadRow.SchemaViolations( + _, + Failure.SchemaViolations(_, + NonEmptyList(FailureDetails.SchemaViolation.IgluError(schemaKey1, clientError1), + List(FailureDetails.SchemaViolation.IgluError(schemaKey2, clientError2)) + ) + ), + _ + ) => + schemaKey1 must beEqualTo(emailSentSchema) + clientError1.toString must contain( + "unallowedAdditionalField: is not defined in the schema and the schema does not allow additional properties" + ) + schemaKey2 must beEqualTo(AtomicFields.atomicSchema) + clientError2.toString must contain("tr_tt") + clientError2.toString must contain("Cannot be converted to java.math.BigDecimal") + case other => + ko(s"[$other] is not a SchemaViolations bad row with 2 IgluError") + }) + } + "return an EnrichmentFailures bad row if one of the enrichment (JS enrichment here) fails" >> { val script = """ @@ -203,7 +258,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE }) } - "return an EnrichmentFailures bad row containing one IgluError if one of the contexts added by the enrichments is invalid" >> { + "return a SchemaViolations bad row containing one IgluError if one of the contexts added by the enrichments is invalid" >> { val script = """ function process(event) { @@ -251,22 +306,19 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE ) enriched.value .map(_ must beLeft.like { - case BadRow.EnrichmentFailures( + case BadRow.SchemaViolations( _, - Failure.EnrichmentFailures( + Failure.SchemaViolations( _, NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - _: FailureDetails.EnrichmentFailureMessage.IgluError - ), + _: FailureDetails.SchemaViolation.IgluError, Nil ) ), payload ) if payload.enriched.derived_contexts.isDefined => ok - case br => ko(s"bad row [$br] is not an EnrichmentFailures containing one IgluError and with derived_contexts defined") + case br => ko(s"[$br] is not a SchemaViolations bad row containing one IgluError and with derived_contexts defined") }) } @@ -1380,7 +1432,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE FailureDetails.EnrichmentFailureMessage.InputData( "sp_dtm", "some_invalid_timestamp_value".some, - "not in the expected format: ms since epoch" + "Not in the expected format: ms since epoch" ) ) val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) @@ -1414,7 +1466,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE FailureDetails.EnrichmentFailureMessage.InputData( "sp_dtm", "some_invalid_timestamp_value".some, - "not in the expected format: ms since epoch" + "Not in the expected format: ms since epoch" ) ) val inputState = EnrichmentManager.Accumulation(input, Nil, Nil) @@ -1577,7 +1629,7 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE } "validateEnriched" should { - "create a bad row if a field is oversized" >> { + "create a SchemaViolations bad row if an atomic field is oversized" >> { val result = EnrichmentManager .enrichEvent[IO]( enrichmentReg, @@ -1590,23 +1642,23 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE SpecHelpers.registryLookup, atomicFieldLimits ) - .value - result.map(_ must beLeft.like { - case badRow: BadRow.EnrichmentFailures => - val firstError = badRow.failure.messages.head.message - val secondError = badRow.failure.messages.last.message - - firstError must beEqualTo( - EnrichmentFailureMessage.Simple("Enriched event does not conform to atomic schema field's length restrictions") - ) - secondError must beEqualTo(EnrichmentFailureMessage.Simple("Field v_tracker longer than maximum allowed size 100")) - case br => - ko(s"bad row [$br] is not BadRow.EnrichmentFailures") - }) + result.value + .map(_ must beLeft.like { + case BadRow.SchemaViolations( + _, + Failure.SchemaViolations(_, NonEmptyList(FailureDetails.SchemaViolation.IgluError(schemaKey, clientError), Nil)), + _ + ) => + schemaKey must beEqualTo(AtomicFields.atomicSchema) + clientError.toString must contain("v_tracker") + clientError.toString must contain("Field is longer than maximum allowed size") + case other => + ko(s"[$other] is not a SchemaViolations bad row with one IgluError") + }) } - "not create a bad row if a field is oversized and acceptInvalid is set to true" >> { + "not create a bad row if an atomic field is oversized and acceptInvalid is set to true" >> { val result = EnrichmentManager .enrichEvent[IO]( enrichmentReg, @@ -1623,6 +1675,69 @@ class EnrichmentManagerSpec extends Specification with EitherMatchers with CatsE result.map(_ must beRight[EnrichedEvent]) } + + "return a SchemaViolations bad row containing both the atomic field length error and the invalid enrichment context error" >> { + val script = + """ + function process(event) { + return [ { schema: "iglu:com.acme/email_sent/jsonschema/1-0-0", + data: { + emailAddress: "hello@world.com", + foo: "bar" + } + } ]; + }""" + + val config = + json"""{ + "parameters": { + "script": ${ConversionUtils.encodeBase64Url(script)} + } + }""" + val schemaKey = SchemaKey( + "com.snowplowanalytics.snowplow", + "javascript_script_config", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ) + val jsEnrichConf = + JavascriptScriptEnrichment.parse(config, schemaKey).toOption.get + val jsEnrich = JavascriptScriptEnrichment(jsEnrichConf.schemaKey, jsEnrichConf.rawFunction) + val enrichmentReg = EnrichmentRegistry[IO](javascriptScript = List(jsEnrich)) + + val rawEvent = RawEvent(api, fatBody, None, source, context) + EnrichmentManager + .enrichEvent[IO]( + enrichmentReg, + client, + processor, + timestamp, + rawEvent, + AcceptInvalid.featureFlags, + IO.unit, + SpecHelpers.registryLookup, + atomicFieldLimits + ) + .value + .map(_ must beLeft.like { + case BadRow.SchemaViolations( + _, + Failure.SchemaViolations(_, + NonEmptyList(FailureDetails.SchemaViolation.IgluError(schemaKey1, clientError1), + List(FailureDetails.SchemaViolation.IgluError(schemaKey2, clientError2)) + ) + ), + _ + ) => + schemaKey1 must beEqualTo(AtomicFields.atomicSchema) + clientError1.toString must contain("v_tracker") + clientError1.toString must contain("Field is longer than maximum allowed size") + schemaKey2 must beEqualTo(emailSentSchema) + clientError2.toString must contain("emailAddress2: is missing but it is required") + case other => + ko(s"[$other] is not a SchemaViolations bad row with 2 IgluError") + }) + } } } @@ -1691,4 +1806,11 @@ object EnrichmentManagerSpec { .getOrElse(throw new RuntimeException("IAB enrichment couldn't be initialised")) // to make sure it's not none .enrichment[IO] + val emailSentSchema = + SchemaKey( + "com.acme", + "email_sent", + "jsonschema", + SchemaVer.Full(1, 0, 0) + ) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/clientEnrichmentSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/clientEnrichmentSpecs.scala index 91c8db02d..7e8869063 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/clientEnrichmentSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/clientEnrichmentSpecs.scala @@ -14,31 +14,16 @@ import cats.syntax.either._ import org.specs2.Specification import org.specs2.matcher.DataTables -import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.iglu.client.validator.ValidatorReport class ExtractViewDimensionsSpec extends Specification with DataTables { val FieldName = "res" - def err: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - """does not conform to regex (\d+)x(\d+)""" - ) - ) - def err2: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - "could not be converted to java.lang.Integer s" - ) - ) + def err: String => ValidatorReport = + input => ValidatorReport("""Does not conform to regex (\d+)x(\d+)""", Some(FieldName), Nil, Option(input)) + + def err2: String => ValidatorReport = + input => ValidatorReport("Could not be converted to java.lang.Integer s", Some(FieldName), Nil, Option(input)) def is = s2""" Extracting screen dimensions (viewports, screen resolution etc) with extractViewDimensions should work $e1""" diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/eventEnrichmentSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/eventEnrichmentSpecs.scala index 92f0448b8..31fbc57b5 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/eventEnrichmentSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/eventEnrichmentSpecs.scala @@ -16,7 +16,7 @@ import org.joda.time.DateTimeZone import org.specs2.Specification import org.specs2.matcher.DataTables -import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.iglu.client.validator.ValidatorReport class ExtractEventTypeSpec extends Specification with DataTables { def is = s2""" @@ -27,16 +27,8 @@ class ExtractEventTypeSpec extends Specification with DataTables { """ val FieldName = "e" - def err: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - "not recognized as an event type" - ) - ) + def err: String => ValidatorReport = + input => ValidatorReport("Not a valid event type", Some(FieldName), Nil, Option(input)) def e1 = "SPEC NAME" || "INPUT VAL" | "EXPECTED OUTPUT" | @@ -66,9 +58,9 @@ class ExtractEventTypeSpec extends Specification with DataTables { def e3 = // format: off "SPEC NAME" || "INPUT VAL" | "EXPECTED OUTPUT" | - "None" !! None ! FailureDetails.EnrichmentFailure(None, FailureDetails.EnrichmentFailureMessage.InputData("collector_tstamp", None, "should be set")).asLeft | - "Negative timestamp" !! BCTstamp ! FailureDetails.EnrichmentFailure(None, FailureDetails.EnrichmentFailureMessage.InputData("collector_tstamp", Some("-0030-01-01T00:00:00.000Z"),"formatted as -0030-01-01 00:00:00.000 is not Redshift-compatible")).asLeft | - ">10k timestamp" !! FarAwayTstamp ! FailureDetails.EnrichmentFailure(None, FailureDetails.EnrichmentFailureMessage.InputData("collector_tstamp", Some("11970-01-01T00:00:00.000Z"),"formatted as 11970-01-01 00:00:00.000 is not Redshift-compatible")).asLeft | + "None" !! None ! ValidatorReport("Field not set", Some("collector_tstamp"), Nil, None).asLeft | + "Negative timestamp" !! BCTstamp ! ValidatorReport("Formatted as -0030-01-01 00:00:00.000 is not Redshift-compatible", Some("collector_tstamp"), Nil, Some("-0030-01-01T00:00:00.000Z")).asLeft | + ">10k timestamp" !! FarAwayTstamp ! ValidatorReport("Formatted as 11970-01-01 00:00:00.000 is not Redshift-compatible", Some("collector_tstamp"), Nil, Some("11970-01-01T00:00:00.000Z")).asLeft | "Valid timestamp" !! SeventiesTstamp ! "1970-01-01 00:00:00.000".asRight |> { // format: on (_, input, expected) => @@ -77,26 +69,12 @@ class ExtractEventTypeSpec extends Specification with DataTables { def e4 = "SPEC NAME" || "INPUT VAL" | "EXPECTED OUTPUT" | - "Not long" !! (("f", "v")) ! FailureDetails - .EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - "f", - Some("v"), - "not in the expected format: ms since epoch" - ) - ) - .asLeft | - "Too long" !! (("f", "1111111111111111")) ! FailureDetails - .EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - "f", - Some("1111111111111111"), - "formatting as 37179-09-17 07:18:31.111 is not Redshift-compatible" - ) - ) - .asLeft | + "Not long" !! (("f", "v")) ! ValidatorReport("Not in the expected format: ms since epoch", Some("f"), Nil, Some("v")).asLeft | + "Too long" !! (("f", "1111111111111111")) ! ValidatorReport("Formatting as 37179-09-17 07:18:31.111 is not Redshift-compatible", + Some("f"), + Nil, + Some("1111111111111111") + ).asLeft | "Valid ts" !! (("f", "1")) ! "1970-01-01 00:00:00.001".asRight |> { (_, input, expected) => EventEnrichments.extractTimestamp(input._1, input._2) must_== expected } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/miscEnrichmentSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/miscEnrichmentSpecs.scala index e13822302..3ec95d6cb 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/miscEnrichmentSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/miscEnrichmentSpecs.scala @@ -18,7 +18,9 @@ import org.specs2.mutable.{Specification => MutSpecification} import org.specs2.Specification import org.specs2.matcher.DataTables -import com.snowplowanalytics.snowplow.badrows.{FailureDetails, Processor} +import com.snowplowanalytics.snowplow.badrows.Processor + +import com.snowplowanalytics.iglu.client.validator.ValidatorReport import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData} @@ -35,16 +37,8 @@ class EtlVersionSpec extends MutSpecification { /** Tests the extractPlatform function. Uses DataTables. */ class ExtractPlatformSpec extends Specification with DataTables { val FieldName = "p" - def err: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - "not recognized as a tracking platform" - ) - ) + def err: String => ValidatorReport = + input => ValidatorReport("Not a valid platform", Some(FieldName), Nil, Option(input)) def is = s2""" Extracting platforms with extractPlatform should work $e1 diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CrossNavigationEnrichmentSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CrossNavigationEnrichmentSpec.scala index db830ff43..387f868f1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CrossNavigationEnrichmentSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/registry/CrossNavigationEnrichmentSpec.scala @@ -182,7 +182,7 @@ class CrossNavigationEnrichmentSpec extends Specification with EitherMatchers { FailureDetails.EnrichmentFailureMessage.InputData( "sp_dtm", "not-timestamp".some, - "not in the expected format: ms since epoch" + "Not in the expected format: ms since epoch" ) ) val result = CrossDomainMap.makeCrossDomainMap(input).map(_.domainMap) @@ -196,7 +196,7 @@ class CrossNavigationEnrichmentSpec extends Specification with EitherMatchers { FailureDetails.EnrichmentFailureMessage.InputData( "sp_dtm", "1111111111111111".some, - "formatting as 37179-09-17 07:18:31.111 is not Redshift-compatible" + "Formatting as 37179-09-17 07:18:31.111 is not Redshift-compatible" ) ) val result = CrossDomainMap.makeCrossDomainMap(input).map(_.domainMap) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/web/ParseCrossDomainSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/web/ParseCrossDomainSpec.scala index 0468e005f..154bde8d1 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/web/ParseCrossDomainSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/enrichments/web/ParseCrossDomainSpec.scala @@ -46,7 +46,7 @@ class ParseCrossDomainSpec extends Specification with DataTables { FailureDetails.EnrichmentFailureMessage.InputData( "sp_dtm", "not-a-timestamp".some, - "not in the expected format: ms since epoch" + "Not in the expected format: ms since epoch" ) ) CrossNavigationEnrichment.parseCrossDomain(List(("_sp" -> Some("abc.not-a-timestamp")))).map(_.domainMap) must beLeft(expected) diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala index f60d5778a..8f0596eba 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/IgluUtilsSpec.scala @@ -440,106 +440,64 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect } "validateEnrichmentsContexts" should { - "return a BadRow.EnrichmentFailures with one expected failure for one invalid context" >> { + "return one expected SchemaViolation for one invalid context" >> { val contexts = List( SpecHelpers.jsonStringToSDJ(invalidEmailSent).right.get ) IgluUtils - .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched, SpecHelpers.registryLookup) - .value - .map(_ must beLeft.like { - case BadRow.EnrichmentFailures(_, failures, _) => - failures.messages match { - case NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) - ), - _ - ) => - ok - case err => ko(s"bad row is EnrichmentFailures but [$err] is not one ValidationError") - } - case br => ko(s"bad row [$br] is not EnrichmentFailures") + .validateEnrichmentsContexts(SpecHelpers.client, contexts, SpecHelpers.registryLookup) + .map(_.toEither must beLeft.like { + case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), Nil) => ok + case other => ko(s"[$other] is not one ValidationError") }) } - "return a BadRow.EnrichmentFailures 2 expected failures for 2 invalid contexts" >> { + "return two expected SchemaViolation for two invalid contexts" >> { val contexts = List( SpecHelpers.jsonStringToSDJ(invalidEmailSent).right.get, SpecHelpers.jsonStringToSDJ(noSchema).right.get ) IgluUtils - .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched, SpecHelpers.registryLookup) - .value - .map(_ must beLeft.like { - case BadRow.EnrichmentFailures(_, failures, _) => - failures.messages match { - case NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) - ), - List( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ResolutionError(_)) - ) - ) - ) => - ok - case errs => - ko( - s"bad row is EnrichmentFailures but [$errs] is not one ValidationError and one ResolutionError" - ) - } - case br => ko(s"bad row [$br] is not EnrichmentFailures") + .validateEnrichmentsContexts(SpecHelpers.client, contexts, SpecHelpers.registryLookup) + .map(_.toEither must beLeft.like { + case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), + List(FailureDetails.SchemaViolation.IgluError(_, ResolutionError(_))) + ) => + ok + case other => ko(s"[$other] is not one ValidationError and one ResolutionError") }) } - "return a BadRow.EnrichmentFailures with an expected failure for 1 valid context and one invalid" >> { + "return one expected SchemaViolation for one invalid context and one valid" >> { val contexts = List( SpecHelpers.jsonStringToSDJ(invalidEmailSent).right.get, SpecHelpers.jsonStringToSDJ(emailSent1).right.get ) IgluUtils - .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched, SpecHelpers.registryLookup) - .value - .map(_ must beLeft.like { - case BadRow.EnrichmentFailures(_, failures, _) => - failures.messages match { - case NonEmptyList( - FailureDetails.EnrichmentFailure( - _, - FailureDetails.EnrichmentFailureMessage.IgluError(_, ValidationError(_, _)) - ), - Nil - ) => - ok - case err => ko(s"bad row is EnrichmentFailures but [$err] is not one ValidationError") - } - case br => ko(s"bad row [$br] is not EnrichmentFailures") + .validateEnrichmentsContexts(SpecHelpers.client, contexts, SpecHelpers.registryLookup) + .map(_.toEither must beLeft.like { + case NonEmptyList(FailureDetails.SchemaViolation.IgluError(_, ValidationError(_, _)), Nil) => ok + case other => ko(s"[$other] is not one ValidationError") }) } - "not return any error for 2 valid contexts" >> { + "not return any error for two valid contexts" >> { val contexts = List( SpecHelpers.jsonStringToSDJ(emailSent1).right.get, SpecHelpers.jsonStringToSDJ(emailSent2).right.get ) IgluUtils - .validateEnrichmentsContexts(SpecHelpers.client, contexts, raw, processor, enriched, SpecHelpers.registryLookup) - .value - .map(_ must beRight) + .validateEnrichmentsContexts(SpecHelpers.client, contexts, SpecHelpers.registryLookup) + .map(_.toEither must beRight) } } "extractAndValidateInputJsons" should { - "return a SchemaViolations containing 1 error if the input event contains an invalid unstructured event" >> { + "return one SchemaViolation if the input event contains an invalid unstructured event" >> { val input = new EnrichedEvent input.setUnstruct_event(buildUnstruct(invalidEmailSent)) @@ -547,18 +505,15 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect .extractAndValidateInputJsons( input, SpecHelpers.client, - raw, - processor, SpecHelpers.registryLookup ) - .value - .map(_ must beLeft.like { - case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 1 => ok - case br => ko(s"bad row [$br] is not a SchemaViolations containing 1 error") + .map(_.toEither must beLeft.like { + case errors if errors.size == 1 => ok + case other => ko(s"[$other] is not one SchemaViolation") }) } - "return a SchemaViolations containing 1 error if the input event contains 1 invalid context" >> { + "return one SchemaViolation if the input event contains one invalid context" >> { val input = new EnrichedEvent input.setContexts(buildInputContexts(List(invalidEmailSent))) @@ -566,18 +521,15 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect .extractAndValidateInputJsons( input, SpecHelpers.client, - raw, - processor, SpecHelpers.registryLookup ) - .value - .map(_ must beLeft.like { - case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 1 => ok - case br => ko(s"bad row [$br] is not a SchemaViolations containing 1 error") + .map(_.toEither must beLeft.like { + case errors if errors.size == 1 => ok + case other => ko(s"[$other] is not one SchemaViolation") }) } - "return a SchemaViolations containing 2 errors if the input event contains an invalid unstructured event and 1 invalid context" >> { + "return two SchemaViolation if the input event contains an invalid unstructured event and one invalid context" >> { val input = new EnrichedEvent input.setUnstruct_event(invalidEmailSent) input.setContexts(buildInputContexts(List(invalidEmailSent))) @@ -586,14 +538,11 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect .extractAndValidateInputJsons( input, SpecHelpers.client, - raw, - processor, SpecHelpers.registryLookup ) - .value - .map(_ must beLeft.like { - case BadRow.SchemaViolations(_, failure, _) if failure.messages.size == 2 => ok - case br => ko(s"bad row [$br] is not a SchemaViolations containing 2 errors") + .map(_.toEither must beLeft.like { + case errors if errors.size == 2 => ok + case other => ko(s"[$other] is not two SchemaViolation") }) } @@ -606,12 +555,9 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect .extractAndValidateInputJsons( input, SpecHelpers.client, - raw, - processor, SpecHelpers.registryLookup ) - .value - .map(_ must beRight.like { + .map(_.toEither must beRight.like { case IgluUtils.EventExtractResult(contexts, Some(unstructEvent), validationInfos) if contexts.size == 2 && validationInfos.isEmpty @@ -619,7 +565,7 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect ok case res => ko( - s"[$res] is not a list with 2 extracted contexts and an option with the extracted unstructured event" + s"[$res] is not a list with two extracted contexts and an option with the extracted unstructured event" ) }) } @@ -640,12 +586,9 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect .extractAndValidateInputJsons( input, SpecHelpers.client, - raw, - processor, SpecHelpers.registryLookup ) - .value - .map(_ must beRight.like { + .map(_.toEither must beRight.like { case IgluUtils.EventExtractResult(contexts, Some(unstructEvent), List(validationInfo)) if contexts.size == 2 && unstructEvent.schema == supersedingExampleSchema101 @@ -655,7 +598,7 @@ class IgluUtilsSpec extends Specification with ValidatedMatchers with CatsEffect ok case res => ko( - s"[$res] is not a list with 2 extracted contexts and an option with the extracted unstructured event" + s"[$res] is not a list with two extracted contexts and an option with the extracted unstructured event" ) }) } diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/MapTransformerSpec.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/MapTransformerSpec.scala index 5a2f5fcbf..f5d3dd7c5 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/MapTransformerSpec.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/MapTransformerSpec.scala @@ -18,7 +18,7 @@ import org.apache.commons.lang3.builder.HashCodeBuilder import org.specs2.matcher.ValidatedMatchers import org.specs2.mutable.Specification -import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.iglu.client.validator.ValidatorReport import com.snowplowanalytics.snowplow.enrich.common.enrichments.{ClientEnrichments, MiscEnrichments} import com.snowplowanalytics.snowplow.enrich.common.utils.MapTransformer._ @@ -52,7 +52,7 @@ final class TargetBean { class MapTransformerSpec extends Specification with ValidatedMatchers { - val identity: (String, String) => Either[FailureDetails.EnrichmentFailure, String] = + val identity: (String, String) => Either[ValidatorReport, String] = (_, value) => value.asRight val sourceMap = Map( diff --git a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala index a6dcd1c39..05b346632 100644 --- a/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala +++ b/modules/common/src/test/scala/com.snowplowanalytics.snowplow.enrich.common/utils/conversionUtilsSpecs.scala @@ -26,6 +26,8 @@ import org.specs2.matcher.DataTables import com.snowplowanalytics.snowplow.badrows._ +import com.snowplowanalytics.iglu.client.validator.ValidatorReport + import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent class StringToUriSpec extends MSpecification with DataTables { @@ -210,7 +212,7 @@ class DecodeBase64UrlSpec extends Specification with DataTables with ScalaCheck """ // Only way of getting a failure currently - def e1 = ConversionUtils.decodeBase64Url(null) must beLeft("could not base64 decode: null") + def e1 = ConversionUtils.decodeBase64Url(null) must beLeft("Could not base64 decode: null") // No string creates a failure def e2 = @@ -263,14 +265,7 @@ class ValidateUuidSpec extends Specification with DataTables with ScalaCheck { def e2 = prop { (str: String) => ConversionUtils.validateUuid(FieldName, str) must beLeft( - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(str), - "not a valid UUID" - ) - ) + ValidatorReport("Not a valid UUID", Some(FieldName), Nil, Option(str)) ) } } @@ -288,14 +283,7 @@ class ValidateIntegerSpec extends Specification { def e2 = { val str = "abc" ConversionUtils.validateInteger(FieldName, str) must beLeft( - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Some(str), - "not a valid integer" - ) - ) + ValidatorReport("Not a valid integer", Some(FieldName), Nil, Some(str)) ) } } @@ -326,16 +314,8 @@ class StringToDoubleLikeSpec extends Specification with DataTables { """ val FieldName = "val" - def err: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - "cannot be converted to Double-like" - ) - ) + def err: String => ValidatorReport = + input => ValidatorReport("Cannot be converted to Double-like", Some(FieldName), Nil, Option(input)) def e1 = "SPEC NAME" || "INPUT STR" | "EXPECTED" | @@ -379,7 +359,7 @@ class StringToJIntegerSpec extends Specification with DataTables { stringToJInteger should convert valid Strings to Java Integers $e2 """ - val err: String = "cannot be converted to java.lang.Integer" + val err: String = "Cannot be converted to java.lang.Integer" def e1 = "SPEC NAME" || "INPUT STR" | "EXPECTED" | @@ -410,16 +390,8 @@ class StringToBooleanLikeJByteSpec extends Specification with DataTables { """ val FieldName = "val" - def err: String => FailureDetails.EnrichmentFailure = - input => - FailureDetails.EnrichmentFailure( - None, - FailureDetails.EnrichmentFailureMessage.InputData( - FieldName, - Option(input), - "cannot be converted to Boolean-like java.lang.Byte" - ) - ) + def err: String => ValidatorReport = + input => ValidatorReport("Cannot be converted to Boolean-like java.lang.Byte", Some(FieldName), Nil, Option(input)) def e1 = "SPEC NAME" || "INPUT STR" | "EXPECTED" |