From cb32164a12212e0d5ca736d6945e1619a70e09bc Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Thu, 18 Jul 2024 18:08:27 +0530 Subject: [PATCH] nits and tests Signed-off-by: Sarthak Aggarwal --- .../aggregators/CountValueAggregator.java | 2 +- .../aggregators/MaxValueAggregator.java | 2 +- .../aggregators/MinValueAggregator.java | 2 +- .../aggregators/SumValueAggregator.java | 2 +- .../startree/aggregators/ValueAggregator.java | 2 +- .../startree/builder/BaseStarTreeBuilder.java | 35 +--- .../startree/utils/StarTreeDataWriter.java | 11 +- .../startree/meta/StarTreeMetaTests.java | 196 ++++++++++++++++++ 8 files changed, 209 insertions(+), 43 deletions(-) create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java index d7485b809f469..fb87c7fc38e7e 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java @@ -65,7 +65,7 @@ public Long toStarTreeNumericTypeValue(Long value) { } @Override - public long getIdempotentMetricValue() { + public long getIdentityMetricValue() { return 0L; } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java index 13fd64e024e24..c5fccd1470d51 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java @@ -74,7 +74,7 @@ public Double toStarTreeNumericTypeValue(Long value) { } @Override - public long getIdempotentMetricValue() { + public long getIdentityMetricValue() { return Long.MIN_VALUE; } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregator.java index 0ca9e5d5b63e9..19d4d3476f7a5 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregator.java @@ -74,7 +74,7 @@ public Double toStarTreeNumericTypeValue(Long value) { } @Override - public long getIdempotentMetricValue() { + public long getIdentityMetricValue() { return Long.MAX_VALUE; } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java index 8ad0d1e6e5baf..13607b8729bde 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java @@ -96,7 +96,7 @@ public Double toStarTreeNumericTypeValue(Long value) { } @Override - public long getIdempotentMetricValue() { + public long getIdentityMetricValue() { return 0; } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java index 7e3f90e94290c..301f741b95c35 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java @@ -65,5 +65,5 @@ public interface ValueAggregator { /** * Fetches a value that does not alter the result of aggregations */ - long getIdempotentMetricValue(); + long getIdentityMetricValue(); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index 480c647e03afa..0523f84fccd0e 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -23,7 +23,6 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.Counter; import org.apache.lucene.util.NumericUtils; -import org.opensearch.common.time.DateUtils; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -42,7 +41,6 @@ import org.opensearch.index.mapper.NumberFieldMapper; import java.io.IOException; -import java.time.temporal.ChronoField; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -269,35 +267,6 @@ public void build( serializeStarTree(numSegmentStarTreeDocument); } - private long getTimeStampVal(final String fieldName, final long val) { - long roundedDate = 0; - long ratio = 0; - - switch (fieldName) { - - case "@timestamp": - ratio = ChronoField.MINUTE_OF_HOUR.getBaseUnit().getDuration().toMillis(); - roundedDate = DateUtils.roundFloor(val, ratio); - return roundedDate; - case "hour": - ratio = ChronoField.HOUR_OF_DAY.getBaseUnit().getDuration().toMillis(); - roundedDate = DateUtils.roundFloor(val, ratio); - return roundedDate; - case "day": - ratio = ChronoField.DAY_OF_MONTH.getBaseUnit().getDuration().toMillis(); - roundedDate = DateUtils.roundFloor(val, ratio); - return roundedDate; - case "month": - roundedDate = DateUtils.roundMonthOfYear(val); - return roundedDate; - case "year": - roundedDate = DateUtils.roundYear(val); - return roundedDate; - default: - return val; - } - } - private void serializeStarTree(int numSegmentStarTreeDocument) throws IOException { // serialize the star tree data long dataFilePointer = dataOut.getFilePointer(); @@ -581,7 +550,7 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( metrics[i] = metricValueAggregator.getInitialAggregatedValue(segmentDocument.metrics[i]); } else { metrics[i] = metricValueAggregator.getInitialAggregatedValueForSegmentDocValue( - getLong(segmentDocument.metrics[i], metricValueAggregator.getIdempotentMetricValue()), + getLong(segmentDocument.metrics[i], metricValueAggregator.getIdentityMetricValue()), starTreeNumericType ); } @@ -605,7 +574,7 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( } else { aggregatedSegmentDocument.metrics[i] = metricValueAggregator.mergeAggregatedValueAndSegmentValue( aggregatedSegmentDocument.metrics[i], - getLong(segmentDocument.metrics[i], metricValueAggregator.getIdempotentMetricValue()), + getLong(segmentDocument.metrics[i], metricValueAggregator.getIdentityMetricValue()), starTreeNumericType ); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java index 5dc8d34226772..874c24696c46c 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataWriter.java @@ -43,14 +43,15 @@ public class StarTreeDataWriter { * @throws IOException if an I/O error occurs while writing the star-tree data */ public static long serializeStarTree(IndexOutput indexOutput, TreeNode rootNode, int numNodes) throws IOException { - int headerSizeInBytes = computeStarTreeDataHeaderByteSize(); - long totalSizeInBytes = headerSizeInBytes + (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES; - - logger.debug("Star tree size in bytes : {}", totalSizeInBytes); + long totalDataSizeInBytes = (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES; + if (logger.isDebugEnabled()) { + int headerSizeInBytes = computeStarTreeDataHeaderByteSize(); + logger.debug("Star tree size in bytes : {}", headerSizeInBytes + totalDataSizeInBytes); + } writeStarTreeHeader(indexOutput, numNodes); writeStarTreeNodes(indexOutput, rootNode); - return totalSizeInBytes; + return totalDataSizeInBytes; } /** diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java new file mode 100644 index 0000000000000..22ad144f68aeb --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetaTests.java @@ -0,0 +1,196 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.meta; + +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.Version; +import org.junit.Before; +import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricEntry; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeBuilderUtils; +import org.opensearch.index.fielddata.IndexNumericFieldData; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.UUID; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE; + +public class StarTreeMetaTests extends OpenSearchTestCase { + + private IndexOutput metaOut; + private IndexInput metaIn; + private StarTreeField starTreeField; + private SegmentWriteState writeState; + private Directory directory; + private FieldInfo[] fieldsInfo; + private List dimensionsOrder; + private List fields = List.of(); + private List metrics; + private List metricAggregatorInfos = new ArrayList<>(); + private int segmentDocumentCount; + private long dataFilePointer; + private long dataFileLength; + + @Before + public void setup() throws IOException { + fields = List.of("field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8", "field9", "field10"); + directory = newFSDirectory(createTempDir()); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + 6, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + fieldsInfo = new FieldInfo[fields.size()]; + for (int i = 0; i < fieldsInfo.length; i++) { + fieldsInfo[i] = new FieldInfo( + fields.get(i), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + FieldInfos fieldInfos = new FieldInfos(fieldsInfo); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + } + + + public void test_starTreeMetadata() throws IOException { + dimensionsOrder = List.of( + new NumericDimension("field1"), + new NumericDimension("field3"), + new NumericDimension("field5"), + new NumericDimension("field8") + ); + metrics = List.of( + new Metric("field2", List.of(MetricStat.SUM)), + new Metric("field4", List.of(MetricStat.SUM)), + new Metric("field6", List.of(MetricStat.COUNT)), + new Metric("field9", List.of(MetricStat.MIN)), + new Metric("field10", List.of(MetricStat.MAX)) + ); + int maxLeafDocs = randomInt(); + StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( + maxLeafDocs, + new HashSet<>(), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + starTreeField = new StarTreeField("star_tree", dimensionsOrder, metrics, starTreeFieldConfiguration); + + for (Metric metric : metrics) { + for (MetricStat metricType : metric.getMetrics()) { + MetricAggregatorInfo metricAggregatorInfo = new MetricAggregatorInfo( + metricType, + metric.getField(), + starTreeField.getName(), + IndexNumericFieldData.NumericType.DOUBLE + ); + metricAggregatorInfos.add(metricAggregatorInfo); + } + } + + dataFileLength = randomLong(); + dataFilePointer = randomLong(); + segmentDocumentCount = randomInt(); + metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT); + StarTreeBuilderUtils.serializeStarTreeMetadata(metaOut, starTreeField, writeState, metricAggregatorInfos, segmentDocumentCount, dataFilePointer, dataFileLength); + metaOut.close(); + metaIn = directory.openInput("star-tree-metadata", IOContext.READONCE); + assertEquals(MAGIC_MARKER, metaIn.readLong()); + + CompositeIndexMetadata compositeIndexMetadata = new CompositeIndexMetadata(metaIn); + assertEquals(starTreeField.getName(), compositeIndexMetadata.getCompositeFieldName()); + assertEquals(STAR_TREE, compositeIndexMetadata.getCompositeFieldType()); + + StarTreeMetadata starTreeMetadata = compositeIndexMetadata.getStarTreeMetadata(); + assertNotNull(starTreeMetadata); + + for (int i = 0; i< dimensionsOrder.size(); i++){ + assertEquals(writeState.fieldInfos.fieldInfo(dimensionsOrder.get(i).getField()).getFieldNumber(), starTreeMetadata.getDimensionFieldNumbers().get(i), 0); + } + + for (int i = 0; i< metricAggregatorInfos.size(); i++){ + MetricEntry metricEntry = starTreeMetadata.getMetricEntries().get(i); + assertEquals(metricAggregatorInfos.get(i).getField(), metricEntry.getMetricName()); + assertEquals(metricAggregatorInfos.get(i).getMetricStat(), metricEntry.getMetricStat()); + } + assertEquals(segmentDocumentCount, starTreeMetadata.getSegmentAggregatedDocCount(), 0); + assertEquals(maxLeafDocs, starTreeMetadata.getMaxLeafDocs(), 0); + assertEquals(starTreeFieldConfiguration.getSkipStarNodeCreationInDims().size(), starTreeMetadata.getSkipStarNodeCreationInDims().size()); + for (String skipStarNodeCreationInDims : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()){ + Integer skipStarNodeCreationInDimsFieldNumber = writeState.fieldInfos.fieldInfo(skipStarNodeCreationInDims).getFieldNumber(); + assertTrue(starTreeMetadata.getSkipStarNodeCreationInDims().contains(skipStarNodeCreationInDimsFieldNumber)); + } + assertEquals(starTreeFieldConfiguration.getBuildMode(), starTreeMetadata.getStarTreeBuildMode()); + assertEquals(dataFileLength, starTreeMetadata.getDataLength()); + assertEquals(dataFilePointer, starTreeMetadata.getDataStartFilePointer()); + + metaIn.close(); + + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + metaOut.close(); + metaIn.close(); + directory.close(); + } + +}