From cb15899477a41c42b795f22ec2d0369fa219642d Mon Sep 17 00:00:00 2001 From: Indy Prentice Date: Tue, 6 Jun 2023 18:09:45 -0500 Subject: [PATCH 1/4] Rollover usage events at a file size rather than time-based manner --- .../main/resources/index/usage-event/aws_es_ism_policy.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json b/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json index c1ab584a1ce61..ff09a663632b8 100644 --- a/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json +++ b/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json @@ -3,13 +3,14 @@ "policy_id": "PREFIXdatahub_usage_event_policy", "description": "Datahub Usage Event Policy", "default_state": "Rollover", - "schema_version": 1, + "schema_version": 2, "states": [ { "name": "Rollover", "actions": [ { "rollover": { + "max_size": "5gb", "min_index_age": "1d" } } @@ -56,4 +57,4 @@ "priority": 100 } } -} \ No newline at end of file +} From 4aeb5ee224f6055a225ae8a63d45407aaefc72c8 Mon Sep 17 00:00:00 2001 From: Indy Prentice Date: Wed, 14 Jun 2023 16:06:04 -0500 Subject: [PATCH 2/4] change max_size to min_size --- .../src/main/resources/index/usage-event/aws_es_ism_policy.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json b/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json index ff09a663632b8..7c5e41e9d4cf7 100644 --- a/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json +++ b/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json @@ -10,7 +10,7 @@ "actions": [ { "rollover": { - "max_size": "5gb", + "min_size": "5gb", "min_index_age": "1d" } } From 353016cda9e245df681669b99085a63585057d5f Mon Sep 17 00:00:00 2001 From: Indy Prentice Date: Thu, 15 Jun 2023 19:08:14 -0500 Subject: [PATCH 3/4] Call validator on the base urn as well as aspect components when ingesting --- .../com/linkedin/metadata/entity/EntityService.java | 10 +++++++--- .../linkedin/metadata/entity/EntityServiceTest.java | 7 ++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java index c09acc0b39cbf..2c662ecd506bb 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -10,6 +10,7 @@ import com.github.fge.jsonpatch.JsonPatch; import com.github.fge.jsonpatch.JsonPatchException; import com.github.fge.jsonpatch.Patch; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterators; @@ -25,6 +26,7 @@ import com.linkedin.data.DataMap; import com.linkedin.data.schema.RecordDataSchema; import com.linkedin.data.schema.TyperefDataSchema; +import com.linkedin.data.schema.validation.ValidationResult; import com.linkedin.data.schema.validator.Validator; import com.linkedin.data.template.DataTemplateUtil; import com.linkedin.data.template.RecordTemplate; @@ -556,10 +558,11 @@ private void validateAspect(Urn urn, RecordTemplate aspect) { } private void validateAspect(Urn urn, RecordTemplate aspect, Validator validator) { - RecordTemplateValidator.validate(aspect, validationResult -> { + Consumer resultFunction = validationResult -> { throw new IllegalArgumentException("Invalid format for aspect: " + aspect + " for entity: " + urn + "\n Cause: " - + validationResult.getMessages()); - }, validator); + + validationResult.getMessages()); }; + RecordTemplateValidator.validate(buildKeyAspect(urn), resultFunction, validator); + RecordTemplateValidator.validate(aspect, resultFunction, validator); } /** * Checks whether there is an actual update to the aspect by applying the updateLambda @@ -690,6 +693,7 @@ protected SystemMetadata generateSystemMetadataIfEmpty(@Nullable SystemMetadata return systemMetadata; } + @VisibleForTesting static void validateUrn(@Nonnull final Urn urn) { if (urn.toString().trim().length() != urn.toString().length()) { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index de5e080f750be..19a3019f78d9e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -1213,12 +1213,9 @@ public void testValidateUrn() throws Exception { } // Urn purely too long - StringBuilder buildStringTooLong = new StringBuilder(); - for (int i = 0; i < 510; i++) { - buildStringTooLong.append('a'); - } + String stringTooLong = "a".repeat(510); - Urn testUrnTooLong = new Urn("li", "testType", new TupleKey(buildStringTooLong.toString())); + Urn testUrnTooLong = new Urn("li", "testType", new TupleKey(stringTooLong)); try { EntityService.validateUrn(testUrnTooLong); Assert.fail("Should have raised IllegalArgumentException for URN too long"); From 7466f8a20e14467ce2f18234f738b34f69fbe611 Mon Sep 17 00:00:00 2001 From: Indy Prentice Date: Thu, 15 Jun 2023 19:11:22 -0500 Subject: [PATCH 4/4] Do not update aws_es_ism_policy.json --- .../main/resources/index/usage-event/aws_es_ism_policy.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json b/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json index 7c5e41e9d4cf7..bc1442d216918 100644 --- a/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json +++ b/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event/aws_es_ism_policy.json @@ -3,14 +3,13 @@ "policy_id": "PREFIXdatahub_usage_event_policy", "description": "Datahub Usage Event Policy", "default_state": "Rollover", - "schema_version": 2, + "schema_version": 1, "states": [ { "name": "Rollover", "actions": [ { "rollover": { - "min_size": "5gb", "min_index_age": "1d" } }