From 1b5009e7ea004898470d23d09d69e3726ba6caba Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Thu, 18 Jul 2024 11:08:13 +0530 Subject: [PATCH] some refactors Signed-off-by: Sarthak Aggarwal --- .../composite/Composite99DocValuesFormat.java | 2 +- .../composite/Composite99DocValuesReader.java | 33 ++++++- .../datacube/startree/StarTreeValues.java | 9 +- .../CompositeIndexMetadata.java | 2 +- .../startree/aggregators/ValueAggregator.java | 2 +- .../startree/builder/BaseStarTreeBuilder.java | 92 +++---------------- .../startree/builder/StarTreeBuilder.java | 2 +- .../startree/builder/StarTreesBuilder.java | 14 ++- ...Node.java => FixedLengthStarTreeNode.java} | 22 +++-- .../startree/node/OffHeapStarTree.java | 62 ------------- .../datacube/startree/node/StarTree.java | 49 +++++++++- .../datacube/startree/node/StarTreeNode.java | 1 - .../datacube/startree/node/Tree.java | 23 +++++ .../utils/StarTreeDataSerializer.java | 4 +- .../utils/StarTreeMetaSerializer.java | 53 ++++++----- .../datacube/startree/utils/TreeNode.java | 4 +- .../builder/OnHeapStarTreeBuilderTests.java | 32 ++++--- 17 files changed, 197 insertions(+), 209 deletions(-) rename server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/{OffHeapStarTreeNode.java => FixedLengthStarTreeNode.java} (85%) delete mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTree.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java index 150dea6277fff..d4b0c0d1f4b80 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java @@ -53,7 +53,7 @@ public class Composite99DocValuesFormat extends DocValuesFormat { public static final String DATA_DOC_VALUES_CODEC = "Composite99DocValuesData"; /** Meta doc values codec name for Composite Doc Values Format */ - static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata"; + public static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata"; /** Filename extension for the composite index data doc values */ public static final String DATA_DOC_VALUES_EXTENSION = "cidvd"; diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java index 6d0aad5be6c13..bd4fbaca3ac05 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java @@ -36,13 +36,14 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricEntry; import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; -import org.opensearch.index.compositeindex.datacube.startree.node.OffHeapStarTree; import org.opensearch.index.compositeindex.datacube.startree.node.StarTree; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.Tree; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeHelper; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; @@ -64,7 +65,7 @@ public class Composite99DocValuesReader extends DocValuesProducer implements Com private final DocValuesProducer delegate; private IndexInput dataIn; private ChecksumIndexInput metaIn; - private final Map starTreeMap = new LinkedHashMap<>(); + private final Map starTreeMap = new LinkedHashMap<>(); private final Map compositeIndexMetadataMap = new LinkedHashMap<>(); private final Map compositeDocValuesProducerMap = new LinkedHashMap<>(); private final List compositeFieldInfos = new ArrayList<>(); @@ -137,7 +138,7 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r switch (compositeIndexMetadata.getCompositeFieldType()) { case STAR_TREE: StarTreeMetadata starTreeMetadata = compositeIndexMetadata.getStarTreeMetadata(); - StarTree starTree = new OffHeapStarTree(dataIn, starTreeMetadata); + Tree starTree = new StarTree(dataIn, starTreeMetadata); starTreeMap.put(compositeFieldName, starTree); compositeIndexMetadataMap.put(compositeFieldName, compositeIndexMetadata); @@ -147,6 +148,7 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r dimensions.add(readState.fieldInfos.fieldInfo(fieldNumber)); } + // initialize star-tree doc values producer StarTree99DocValuesProducer starTreeDocValuesProducer = new StarTree99DocValuesProducer( readState, Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, @@ -240,14 +242,18 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp switch (compositeIndexFieldInfo.getType()) { case STAR_TREE: + // building star tree values CompositeIndexMetadata compositeIndexMetadata = compositeIndexMetadataMap.get(compositeIndexFieldInfo.getField()); StarTreeMetadata starTreeMetadata = compositeIndexMetadata.getStarTreeMetadata(); + + // build skip star node dimensions Set skipStarNodeCreationInDimsFieldNumbers = starTreeMetadata.getSkipStarNodeCreationInDims(); Set skipStarNodeCreationInDims = new HashSet<>(); for (Integer fieldNumber : skipStarNodeCreationInDimsFieldNumbers) { skipStarNodeCreationInDims.add(readState.fieldInfos.fieldInfo(fieldNumber).getName()); } + // build dimensions List dimensionFieldNumbers = starTreeMetadata.getDimensionFieldNumbers(); List dimensions = new ArrayList<>(); List mergeDimensions = new ArrayList<>(); @@ -256,6 +262,7 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp mergeDimensions.add(new MergeDimension(readState.fieldInfos.fieldInfo(fieldNumber).name)); } + // build metrics Map starTreeMetricMap = new ConcurrentHashMap<>(); for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { String metricName = metricEntry.getMetricName(); @@ -265,6 +272,7 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp } List starTreeMetrics = new ArrayList<>(starTreeMetricMap.values()); + // star-tree field StarTreeField starTreeField = new StarTreeField( compositeIndexMetadata.getCompositeFieldName(), mergeDimensions, @@ -275,13 +283,18 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp starTreeMetadata.getStarTreeBuildMode() ) ); + + // star-tree root node StarTreeNode rootNode = starTreeMap.get(compositeIndexFieldInfo.getField()).getRoot(); + + // get doc id set iterators for metrics and dimensions StarTree99DocValuesProducer starTree99DocValuesProducer = (StarTree99DocValuesProducer) compositeDocValuesProducerMap.get( compositeIndexMetadata.getCompositeFieldName() ); Map dimensionsDocIdSetIteratorMap = new LinkedHashMap<>(); Map metricsDocIdSetIteratorMap = new LinkedHashMap<>(); + // get doc id set iterators for dimensions for (String dimension : dimensions) { dimensionsDocIdSetIteratorMap.put( dimension, @@ -291,6 +304,7 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp ); } + // get doc id set iterators for metrics for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { String metricFullName = StarTreeHelper.fullFieldNameForStarTreeMetricsDocValues( starTreeField.getName(), @@ -300,7 +314,18 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp metricsDocIdSetIteratorMap.put(metricFullName, starTree99DocValuesProducer.getSortedNumeric(metricFullName)); } - return new StarTreeValues(starTreeField, rootNode, dimensionsDocIdSetIteratorMap, metricsDocIdSetIteratorMap); + // create star-tree attributes map + Map starTreeAttributes = new HashMap<>(); + starTreeAttributes.put("segment_docs_count", String.valueOf(starTreeMetadata.getSegmentAggregatedDocCount())); + + // return star-tree values + return new StarTreeValues( + starTreeField, + rootNode, + dimensionsDocIdSetIteratorMap, + metricsDocIdSetIteratorMap, + starTreeAttributes + ); default: throw new CorruptIndexException("Unsupported composite index field type: ", compositeIndexFieldInfo.getType().getName()); diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java index baed90273d311..8378a4063b7ca 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java @@ -27,17 +27,20 @@ public class StarTreeValues implements CompositeIndexValues { private final StarTreeNode root; private final Map dimensionDocValuesIteratorMap; private final Map metricDocValuesIteratorMap; + private final Map attributes; public StarTreeValues( StarTreeField starTreeField, StarTreeNode root, Map dimensionDocValuesIteratorMap, - Map metricDocValuesIteratorMap + Map metricDocValuesIteratorMap, + Map attributes ) { this.starTreeField = starTreeField; this.root = root; this.dimensionDocValuesIteratorMap = dimensionDocValuesIteratorMap; this.metricDocValuesIteratorMap = metricDocValuesIteratorMap; + this.attributes = attributes; } @Override @@ -60,4 +63,8 @@ public Map getDimensionDocValuesIteratorMap() { public Map getMetricDocValuesIteratorMap() { return metricDocValuesIteratorMap; } + + public Map getAttributes() { + return attributes; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java index 900d13cbb1f49..8bb72708e1d52 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java @@ -22,7 +22,7 @@ /** * This class represents the metadata of a Composite Index, which includes information about * the composite field name, type, and the specific metadata for the type of composite field - * (e.g., StarTree metadata). + * (e.g., Tree metadata). * * @opensearch.experimental */ diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java index 21764ace79915..7e3f90e94290c 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java @@ -63,7 +63,7 @@ public interface ValueAggregator { A toStarTreeNumericTypeValue(Long rawValue); /** - * Fetches an value that does not alter the result of aggregations + * Fetches a value that does not alter the result of aggregations */ long getIdempotentMetricValue(); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index d5c2f8ef4c20a..480c647e03afa 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -54,9 +54,9 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeBuilderUtils.ALL; import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeHelper.fullFieldNameForStarTreeDimensionsDocValues; import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeHelper.fullFieldNameForStarTreeMetricsDocValues; -import static org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode.ALL; /** * Builder for star tree. Defines the algorithm to construct star-tree @@ -195,8 +195,8 @@ public List getMetricReaders(SegmentWriteState stat * Builds the star tree from the original segment documents * * @param fieldProducerMap contain s the docValues producer to get docValues associated with each field - * @param fieldNumberAcrossStarTrees - * @param starTreeDocValuesConsumer + * @param fieldNumberAcrossStarTrees maintains a counter for the number of star-tree fields + * @param starTreeDocValuesConsumer consumes the generated star-tree docValues * @throws IOException when we are unable to build star-tree */ public void build( @@ -227,8 +227,8 @@ public void build( * Builds the star tree using sorted and aggregated star-tree Documents * * @param starTreeDocumentIterator contains the sorted and aggregated documents - * @param fieldNumberAcrossStarTrees - * @param starTreeDocValuesConsumer + * @param fieldNumberAcrossStarTrees maintains a counter for the number of star-tree fields + * @param starTreeDocValuesConsumer consumes the generated star-tree docValues * @throws IOException when we are unable to build star-tree */ public void build( @@ -374,7 +374,7 @@ private void createSortedDocValuesIndices(DocValuesConsumer docValuesConsumer, A } for (int docId = 0; docId < numStarTreeDocs; docId++) { - StarTreeDocument starTreeDocument = getStarTreeDocument(docId); + StarTreeDocument starTreeDocument = getStarTreeDocumentForCreatingDocValues(docId); for (int i = 0; i < starTreeDocument.dimensions.length; i++) { Long val = starTreeDocument.dimensions[i]; if (val != null) { @@ -439,6 +439,13 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) { */ public abstract StarTreeDocument getStarTreeDocument(int docId) throws IOException; + /** + * Returns the star-tree document for the given doc id while creating doc values + * + * @param docId document id + * @return star tree document + * @throws IOException if an I/O error occurs while fetching the star-tree document + */ public abstract StarTreeDocument getStarTreeDocumentForCreatingDocValues(int docId) throws IOException; /** @@ -620,12 +627,12 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( */ private static long getLong(Object metric, long idempotentMetricValue) { - Long metricValue; + long metricValue; try { if (metric instanceof Long) { metricValue = (long) metric; } else if (metric != null) { - metricValue = Long.valueOf(String.valueOf(metric)); + metricValue = Long.parseLong(String.valueOf(metric)); } else { logger.debug("metric value is null, returning idempotent metric value for the aggregator"); return idempotentMetricValue; @@ -677,75 +684,6 @@ public StarTreeDocument reduceStarTreeDocuments(StarTreeDocument aggregatedDocum } } - /** - * Builds the star tree from the original segment documents - * - * @param fieldProducerMap contain s the docValues producer to get docValues associated with each field - * @throws IOException when we are unable to build star-tree - */ - public void build(Map fieldProducerMap) throws IOException { - long startTime = System.currentTimeMillis(); - logger.debug("Star-tree build is a go with star tree field {}", starTreeField.getName()); - if (totalSegmentDocs == 0) { - logger.debug("No documents found in the segment"); - return; - } - List metricReaders = getMetricReaders(writeState, fieldProducerMap); - List dimensionsSplitOrder = starTreeField.getDimensionsOrder(); - SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[dimensionsSplitOrder.size()]; - for (int i = 0; i < numDimensions; i++) { - String dimension = dimensionsSplitOrder.get(i).getField(); - FieldInfo dimensionFieldInfo = writeState.fieldInfos.fieldInfo(dimension); - dimensionReaders[i] = new SequentialDocValuesIterator( - fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo) - ); - } - Iterator starTreeDocumentIterator = sortAndAggregateSegmentDocuments(dimensionReaders, metricReaders); - logger.debug("Sorting and aggregating star-tree in ms : {}", (System.currentTimeMillis() - startTime)); - build(starTreeDocumentIterator); - logger.debug("Finished Building star-tree in ms : {}", (System.currentTimeMillis() - startTime)); - } - - /** - * Builds the star tree using Star-Tree Document - * - * @param starTreeDocumentIterator contains the sorted and aggregated documents - * @throws IOException when we are unable to build star-tree - */ - void build(Iterator starTreeDocumentIterator) throws IOException { - int numSegmentStarTreeDocument = totalSegmentDocs; - - while (starTreeDocumentIterator.hasNext()) { - appendToStarTree(starTreeDocumentIterator.next()); - } - int numStarTreeDocument = numStarTreeDocs; - logger.debug("Generated star tree docs : [{}] from segment docs : [{}]", numStarTreeDocument, numSegmentStarTreeDocument); - - if (numStarTreeDocs == 0) { - // TODO: Uncomment when segment codec and file formats is ready - // StarTreeBuilderUtils.serializeTree(indexOutput, rootNode, dimensionsSplitOrder, numNodes); - return; - } - - constructStarTree(rootNode, 0, numStarTreeDocs); - int numStarTreeDocumentUnderStarNode = numStarTreeDocs - numStarTreeDocument; - logger.debug( - "Finished constructing star-tree, got [ {} ] tree nodes and [ {} ] starTreeDocument under star-node", - numStarTreeNodes, - numStarTreeDocumentUnderStarNode - ); - - createAggregatedDocs(rootNode); - int numAggregatedStarTreeDocument = numStarTreeDocs - numStarTreeDocument - numStarTreeDocumentUnderStarNode; - logger.debug("Finished creating aggregated documents : {}", numAggregatedStarTreeDocument); - - // TODO: When StarTree Codec is ready - // Create doc values indices in disk - // Serialize and save in disk - // Write star tree metadata for off heap implementation - - } - /** * Adds a document to star-tree * diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java index 7e4159d035113..98a3cf9e154d4 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java @@ -42,7 +42,7 @@ void build( ) throws IOException; /** - * Builds the star tree using StarTree values from multiple segments + * Builds the star tree using Tree values from multiple segments * * @param starTreeValuesSubs contains the star tree values from multiple segments * @param fieldNumberAcrossStarTrees maintains the unique field number across the fields in the star tree diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java index de489de3d616a..a92384c250dfd 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java @@ -66,7 +66,13 @@ public StarTreesBuilder(SegmentWriteState segmentWriteState, MapperService mappe } /** - * Builds the star-trees. + * Builds all star-trees for given star-tree fields. + * + * @param metaOut an IndexInput for star-tree metadata + * @param dataOut an IndexInput for star-tree data + * @param fieldProducerMap fetches iterators for the fields (dimensions and metrics) + * @param starTreeDocValuesConsumer a consumer to write star-tree doc values + * @throws IOException */ public void build( IndexOutput metaOut, @@ -101,11 +107,11 @@ public void close() throws IOException { /** * Merges star tree fields from multiple segments * - * @param metaOut - * @param dataOut + * @param metaOut an IndexInput for star-tree metadata + * @param dataOut an IndexInput for star-tree data * @param starTreeFieldMap StarTreeField configuration per field * @param starTreeValuesSubsPerField starTreeValuesSubs per field - * @param starTreeDocValuesConsumer + * @param starTreeDocValuesConsumer a consumer to write star-tree doc values */ public void buildDuringMerge( IndexOutput metaOut, diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java similarity index 85% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTreeNode.java rename to server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java index ec30f14f044d4..b4adae7a9380a 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java @@ -12,12 +12,14 @@ import java.io.IOException; import java.util.Iterator; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeBuilderUtils.ALL; + /** - * Off heap implementation of {@link StarTreeNode} + * Fixed Length implementation of {@link StarTreeNode} * * @opensearch.experimental */ -public class OffHeapStarTreeNode implements StarTreeNode { +public class FixedLengthStarTreeNode implements StarTreeNode { public static final int NUM_INT_SERIALIZABLE_FIELDS = 6; public static final int NUM_LONG_SERIALIZABLE_FIELDS = 1; public static final int NUM_BYTE_SERIALIZABLE_FIELDS = 1; @@ -39,7 +41,7 @@ public class OffHeapStarTreeNode implements StarTreeNode { RandomAccessInput in; - public OffHeapStarTreeNode(RandomAccessInput in, int nodeId) throws IOException { + public FixedLengthStarTreeNode(RandomAccessInput in, int nodeId) throws IOException { this.in = in; this.nodeId = nodeId; firstChildId = getInt(FIRST_CHILD_ID_OFFSET); @@ -125,8 +127,8 @@ public StarTreeNode getChildForDimensionValue(long dimensionValue) throws IOExce return binarySearchChild(dimensionValue); } - private OffHeapStarTreeNode handleStarNode() throws IOException { - OffHeapStarTreeNode firstNode = new OffHeapStarTreeNode(in, firstChildId); + private FixedLengthStarTreeNode handleStarNode() throws IOException { + FixedLengthStarTreeNode firstNode = new FixedLengthStarTreeNode(in, firstChildId); if (firstNode.getDimensionValue() == ALL) { return firstNode; } else { @@ -134,14 +136,14 @@ private OffHeapStarTreeNode handleStarNode() throws IOException { } } - private OffHeapStarTreeNode binarySearchChild(long dimensionValue) throws IOException { + private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException { // Binary search to find child node int low = firstChildId; int high = getInt(LAST_CHILD_ID_OFFSET); while (low <= high) { int mid = low + (high - low) / 2; - OffHeapStarTreeNode midNode = new OffHeapStarTreeNode(in, mid); + FixedLengthStarTreeNode midNode = new FixedLengthStarTreeNode(in, mid); long midNodeDimensionValue = midNode.getDimensionValue(); if (midNodeDimensionValue == dimensionValue) { @@ -156,7 +158,7 @@ private OffHeapStarTreeNode binarySearchChild(long dimensionValue) throws IOExce } @Override - public Iterator getChildrenIterator() throws IOException { + public Iterator getChildrenIterator() throws IOException { return new Iterator<>() { private int currentChildId = firstChildId; private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET); @@ -167,9 +169,9 @@ public boolean hasNext() { } @Override - public OffHeapStarTreeNode next() { + public FixedLengthStarTreeNode next() { try { - return new OffHeapStarTreeNode(in, currentChildId++); + return new FixedLengthStarTreeNode(in, currentChildId++); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTree.java deleted file mode 100644 index c34cdc0b45ea4..0000000000000 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTree.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.index.compositeindex.datacube.startree.node; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.RandomAccessInput; -import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; - -import java.io.IOException; - -import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; -import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; - -/** - * Off heap implementation of the star-tree. - * - * @opensearch.experimental - */ -public class OffHeapStarTree implements StarTree { - private static final Logger logger = LogManager.getLogger(OffHeapStarTree.class); - private final OffHeapStarTreeNode root; - private final Integer numNodes; - - public OffHeapStarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOException { - long magicMarker = data.readLong(); - if (MAGIC_MARKER != magicMarker) { - logger.error("Invalid magic marker"); - throw new IOException("Invalid magic marker"); - } - int version = data.readInt(); - if (VERSION != version) { - logger.error("Invalid star tree version"); - throw new IOException("Invalid version"); - } - numNodes = data.readInt(); // num nodes - - RandomAccessInput in = data.randomAccessSlice(data.getFilePointer(), starTreeMetadata.getDataLength()); - root = new OffHeapStarTreeNode(in, 0); - } - - @Override - public StarTreeNode getRoot() { - return root; - } - - /** - * Returns the number of nodes in star-tree - * - * @return number of nodes in te star-tree - */ - public Integer getNumNodes() { - return numNodes; - } - -} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java index e21dc225a4c8f..134c75b7d19b5 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java @@ -7,17 +7,56 @@ */ package org.opensearch.index.compositeindex.datacube.startree.node; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; + +import java.io.IOException; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; + /** - * Interface for star-tree. + * Off heap implementation of the star-tree. * * @opensearch.experimental */ -public interface StarTree { +public class StarTree implements Tree { + private static final Logger logger = LogManager.getLogger(StarTree.class); + private final FixedLengthStarTreeNode root; + private final Integer numNodes; + + public StarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOException { + long magicMarker = data.readLong(); + if (MAGIC_MARKER != magicMarker) { + logger.error("Invalid magic marker"); + throw new IOException("Invalid magic marker"); + } + int version = data.readInt(); + if (VERSION != version) { + logger.error("Invalid star tree version"); + throw new IOException("Invalid version"); + } + numNodes = data.readInt(); // num nodes + + RandomAccessInput in = data.randomAccessSlice(data.getFilePointer(), starTreeMetadata.getDataLength()); + root = new FixedLengthStarTreeNode(in, 0); + } + + @Override + public StarTreeNode getRoot() { + return root; + } /** - * Fetches the root node of the star-tree. - * @return the root of the star-tree + * Returns the number of nodes in star-tree + * + * @return number of nodes in te star-tree */ - StarTreeNode getRoot(); + public Integer getNumNodes() { + return numNodes; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java index 59522ffa4be89..cb58d53534396 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java @@ -20,7 +20,6 @@ */ @ExperimentalApi public interface StarTreeNode { - long ALL = -1l; /** * Returns the dimension ID of the current star-tree node. diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java new file mode 100644 index 0000000000000..811150da64bec --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/Tree.java @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +/** + * Interface for star-tree. + * + * @opensearch.experimental + */ +public interface Tree { + + /** + * Fetches the root node of the star-tree. + * @return the root of the star-tree + */ + StarTreeNode getRoot(); + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataSerializer.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataSerializer.java index a4c3e377125d4..482726b17a37e 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataSerializer.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataSerializer.java @@ -21,7 +21,7 @@ import static org.opensearch.index.compositeindex.CompositeIndexConstants.MAGIC_MARKER; import static org.opensearch.index.compositeindex.CompositeIndexConstants.VERSION; -import static org.opensearch.index.compositeindex.datacube.startree.node.OffHeapStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; +import static org.opensearch.index.compositeindex.datacube.startree.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeBuilderUtils.ALL; /** @@ -46,7 +46,7 @@ public static long serializeStarTree(IndexOutput indexOutput, TreeNode rootNode, int headerSizeInBytes = computeStarTreeDataHeaderByteSize(); long totalSizeInBytes = headerSizeInBytes + (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES; - logger.info("Star tree size in bytes : {}", totalSizeInBytes); + logger.debug("Star tree size in bytes : {}", totalSizeInBytes); writeStarTreeHeader(indexOutput, numNodes); writeStarTreeNodes(indexOutput, rootNode); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaSerializer.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaSerializer.java index e1091ba35d3f8..c9df2a23ad195 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaSerializer.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaSerializer.java @@ -58,32 +58,35 @@ public static void serializeStarTreeMetadata( long dataFilePointer, long dataFileLength ) throws IOException { - long totalSizeInBytes = 0; - // header size - totalSizeInBytes += computeHeaderByteSize(compositeFieldType, starTreeField.getName()); - // number of dimensions - totalSizeInBytes += Integer.BYTES; - // dimension field numbers - totalSizeInBytes += (long) starTreeField.getDimensionsOrder().size() * Integer.BYTES; - // metric count - totalSizeInBytes += Integer.BYTES; - // metric - metric stat pair - totalSizeInBytes += computeMetricEntriesSizeInBytes(metricAggregatorInfos); - // segment aggregated document count - totalSizeInBytes += Integer.BYTES; - // max leaf docs - totalSizeInBytes += Integer.BYTES; - // skip star node creation dimensions count - totalSizeInBytes += Integer.BYTES; - // skip star node creation dimensions field numbers - totalSizeInBytes += (long) starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims().size() * Integer.BYTES; - // data start file pointer - totalSizeInBytes += Long.BYTES; - // data length - totalSizeInBytes += Long.BYTES; - - logger.info("Star tree size in bytes : {}", totalSizeInBytes); + if (logger.isDebugEnabled()) { + long totalSizeInBytes = 0; + + // header size + totalSizeInBytes += computeHeaderByteSize(compositeFieldType, starTreeField.getName()); + // number of dimensions + totalSizeInBytes += Integer.BYTES; + // dimension field numbers + totalSizeInBytes += (long) starTreeField.getDimensionsOrder().size() * Integer.BYTES; + // metric count + totalSizeInBytes += Integer.BYTES; + // metric - metric stat pair + totalSizeInBytes += computeMetricEntriesSizeInBytes(metricAggregatorInfos); + // segment aggregated document count + totalSizeInBytes += Integer.BYTES; + // max leaf docs + totalSizeInBytes += Integer.BYTES; + // skip star node creation dimensions count + totalSizeInBytes += Integer.BYTES; + // skip star node creation dimensions field numbers + totalSizeInBytes += (long) starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims().size() * Integer.BYTES; + // data start file pointer + totalSizeInBytes += Long.BYTES; + // data length + totalSizeInBytes += Long.BYTES; + + logger.debug("Star tree size in bytes : {}", totalSizeInBytes); + } writeMetaHeader(metaOut, compositeFieldType, starTreeField.getName()); writeMeta(metaOut, writeState, metricAggregatorInfos, starTreeField, segmentAggregatedCount, dataFilePointer, dataFileLength); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java index 5cf737c61ab2d..9c0da6e388f2d 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java @@ -11,6 +11,8 @@ import java.util.Map; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeBuilderUtils.ALL; + /** * /** * Represents a node in a tree data structure, specifically designed for a star-tree implementation. @@ -21,8 +23,6 @@ @ExperimentalApi public class TreeNode { - public static final int ALL = -1; - /** * The dimension id for the dimension (field) associated with this star-tree node. */ diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java index 7c891f1780f8f..5667ca428f648 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java @@ -235,20 +235,25 @@ public void test_sortAndAggregateStarTreeDocuments() throws IOException { } - public void test_sortAndAggregateStarTreeDocuments_nullMetric() throws IOException { + public void test_sortAndAggregateStarTreeDocuments_idempotentMetric() throws IOException { int noOfStarTreeDocuments = 5; StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 12.0, 10.0, randomDouble(), 8.0, 20.0 }); - starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 8.0, 20.0 }); + starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 8.0, 24.0 }); starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 14.0, 12.0, randomDouble(), 8.0, 20.0 }); starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 }); starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, null, randomDouble(), 8.0, 20.0 }); + StarTreeDocument expectedStarTreeDocument = new StarTreeDocument( new Long[] { 2L, 4L, 3L, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0 } ); + StarTreeDocument expectedStarTreeDocumentWithIdempotentMetric = new StarTreeDocument( + new Long[] { 3L, 4L, 2L, 1L }, + new Object[] { 35.0, 18.0, 3L, 8.0, 24.0 } + ); StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; for (int i = 0; i < noOfStarTreeDocuments; i++) { @@ -275,11 +280,14 @@ public void test_sortAndAggregateStarTreeDocuments_nullMetric() throws IOExcepti assertEquals(expectedStarTreeDocument.metrics[met], resultStarTreeDocument.metrics[met]); } - assertThrows( - "Null metric should have resulted in IllegalStateException", - IllegalStateException.class, - segmentStarTreeDocumentIterator::next - ); + resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); + for (int dim = 0; dim < 4; dim++) { + assertEquals(expectedStarTreeDocumentWithIdempotentMetric.dimensions[dim], resultStarTreeDocument.dimensions[dim]); + } + + for (int met = 0; met < 5; met++) { + assertEquals(expectedStarTreeDocumentWithIdempotentMetric.metrics[met], resultStarTreeDocument.metrics[met]); + } } @@ -792,7 +800,7 @@ public void test_build_starTreeDataset() throws IOException { segmentStarTreeDocuments, false ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); List expectedStarTreeDocuments = List.of( @@ -932,14 +940,14 @@ public void testMergeFlow() throws IOException { SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); Map dimDocIdSetIterators = Map.of("field1", d1sndv, "field3", d2sndv); Map metricDocIdSetIterators = Map.of("field2", m1sndv); - StarTreeValues starTreeValues = new StarTreeValues(sf, null, dimDocIdSetIterators, metricDocIdSetIterators); + StarTreeValues starTreeValues = new StarTreeValues(sf, null, dimDocIdSetIterators, metricDocIdSetIterators, new HashMap<>()); SortedNumericDocValues f2d1sndv = getSortedNumericMock(dimList, docsWithField); SortedNumericDocValues f2d2sndv = getSortedNumericMock(dimList2, docsWithField2); SortedNumericDocValues f2m1sndv = getSortedNumericMock(metricsList, metricsWithField); Map f2dimDocIdSetIterators = Map.of("field1", f2d1sndv, "field3", f2d2sndv); Map f2metricDocIdSetIterators = Map.of("field2", f2m1sndv); - StarTreeValues starTreeValues2 = new StarTreeValues(sf, null, f2dimDocIdSetIterators, f2metricDocIdSetIterators); + StarTreeValues starTreeValues2 = new StarTreeValues(sf, null, f2dimDocIdSetIterators, f2metricDocIdSetIterators, new HashMap<>()); OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); @@ -989,14 +997,14 @@ public void testMergeFlowWithCount() throws IOException { SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); Map dimDocIdSetIterators = Map.of("field1", d1sndv, "field3", d2sndv); Map metricDocIdSetIterators = Map.of("field2", m1sndv); - StarTreeValues starTreeValues = new StarTreeValues(sf, null, dimDocIdSetIterators, metricDocIdSetIterators); + StarTreeValues starTreeValues = new StarTreeValues(sf, null, dimDocIdSetIterators, metricDocIdSetIterators, new HashMap<>()); SortedNumericDocValues f2d1sndv = getSortedNumericMock(dimList, docsWithField); SortedNumericDocValues f2d2sndv = getSortedNumericMock(dimList2, docsWithField2); SortedNumericDocValues f2m1sndv = getSortedNumericMock(metricsList, metricsWithField); Map f2dimDocIdSetIterators = Map.of("field1", f2d1sndv, "field3", f2d2sndv); Map f2metricDocIdSetIterators = Map.of("field2", f2m1sndv); - StarTreeValues starTreeValues2 = new StarTreeValues(sf, null, f2dimDocIdSetIterators, f2metricDocIdSetIterators); + StarTreeValues starTreeValues2 = new StarTreeValues(sf, null, f2dimDocIdSetIterators, f2metricDocIdSetIterators, new HashMap<>()); OnHeapStarTreeBuilder builder = new OnHeapStarTreeBuilder(metaOut, dataOut, sf, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /**