diff --git a/server/src/main/java/org/apache/lucene/index/BaseSingleStarTreeBuilder.java b/server/src/main/java/org/apache/lucene/index/BaseSingleStarTreeBuilder.java new file mode 100644 index 0000000000000..7b0957b1328de --- /dev/null +++ b/server/src/main/java/org/apache/lucene/index/BaseSingleStarTreeBuilder.java @@ -0,0 +1,660 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.apache.lucene.index; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.search.DocIdSetIterator; +import org.opensearch.index.compositeindex.datacube.DateDimension; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricStatFieldPair; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.ValueAggregator; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.ValueAggregatorFactory; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.compositeindex.datacube.startree.builder.SingleTreeBuilder; +import org.opensearch.index.compositeindex.datacube.startree.builder.StarTreeDocValuesIteratorFactory; +import org.opensearch.index.compositeindex.datacube.startree.data.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeBuilderUtils; +import org.opensearch.index.fielddata.IndexNumericFieldData; +import org.opensearch.index.mapper.Mapper; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.NumberFieldMapper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Base class for star-tree builder + */ +public abstract class BaseSingleStarTreeBuilder implements SingleTreeBuilder { + + // TODO: STAR_TREE_CODEC will be moved to CodecService once the Star Tree Codec is defined + public static final String STAR_TREE_CODEC = "startreecodec"; + + private static final Logger logger = LogManager.getLogger(BaseSingleStarTreeBuilder.class); + + public static final int STAR_IN_DOC_VALUES_INDEX = -1; + + protected final String[] dimensionsSplitOrder; + protected final Set skipStarNodeCreationForDimensions; + protected final String[] metrics; + + protected final int numMetrics; + protected final int numDimensions; + protected int numDocs; + protected int totalDocs; + protected int numNodes; + protected final int maxLeafDocuments; + + protected final StarTreeBuilderUtils.TreeNode rootNode = getNewNode(); + + // TODO: This will be initialized with OnHeap / OffHeap Implementations (Commented it's occurrences for now) + // private IndexOutput indexOutput; + protected DocIdSetIterator[] dimensionReaders; + protected DocIdSetIterator[] metricReaders; + + protected ValueAggregator[] valueAggregators; + protected IndexNumericFieldData.NumericType[] numericTypes; + protected DocValuesConsumer docValuesConsumer; + protected DocValuesProducer docValuesProducer; + + private final StarTreeDocValuesIteratorFactory starTreeDocValuesIteratorFactory; + private final StarTreeField starTreeField; + private final StarTreeFieldConfiguration starTreeFieldSpec; + private final List metricStatFieldPairs; + private final MapperService mapperService; + + /** + * Constructor for base star-tree builder + * + * @param starTreeField holds the configuration for the star tree + * @param docValuesProducer helps return the doc values iterator for each type based on field name + * @param docValuesConsumer to consume the new aggregated metrics during flush + * @param state stores the segment state + * @param mapperService helps to find the original type of the field + */ + protected BaseSingleStarTreeBuilder( + StarTreeField starTreeField, + DocValuesProducer docValuesProducer, + DocValuesConsumer docValuesConsumer, + SegmentWriteState state, + MapperService mapperService + ) throws IOException { + + logger.info("Building in base star tree builder"); + + // String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, "stttree"); + // logger.info("Star tree file name : {}", docFileName); + + // indexOutput = state.directory.createOutput(docFileName, state.context); + // CodecUtil.writeIndexHeader(indexOutput, STAR_TREE_CODEC, 0, state.segmentInfo.getId(), state.segmentSuffix); + this.mapperService = mapperService; + this.starTreeField = starTreeField; + this.starTreeFieldSpec = starTreeField.getStarTreeConfig(); + this.docValuesConsumer = docValuesConsumer; + this.docValuesProducer = docValuesProducer; + this.starTreeDocValuesIteratorFactory = new StarTreeDocValuesIteratorFactory(); + + List dimensionsSplitOrder = starTreeField.getDimensionsOrder(); + this.numDimensions = dimensionsSplitOrder.size(); + this.dimensionsSplitOrder = new String[numDimensions]; + + this.skipStarNodeCreationForDimensions = new HashSet<>(); + this.totalDocs = state.segmentInfo.maxDoc(); + this.dimensionReaders = new DocIdSetIterator[numDimensions]; + Set skipStarNodeCreationForDimensions = this.starTreeFieldSpec.getSkipStarNodeCreationInDims(); + + for (int i = 0; i < numDimensions; i++) { + String dimension = dimensionsSplitOrder.get(i).getField(); + this.dimensionsSplitOrder[i] = dimension; + if (skipStarNodeCreationForDimensions.contains(dimensionsSplitOrder.get(i).getField())) { + this.skipStarNodeCreationForDimensions.add(i); + } + FieldInfo dimensionFieldInfos = state.fieldInfos.fieldInfo(dimension); + DocValuesType dimensionDocValuesType = state.fieldInfos.fieldInfo(dimension).getDocValuesType(); + dimensionReaders[i] = starTreeDocValuesIteratorFactory.createIterator( + dimensionDocValuesType, + dimensionFieldInfos, + docValuesProducer + ); + } + + this.metricStatFieldPairs = generateMetricStatFieldPairs(); + this.numMetrics = metricStatFieldPairs.size(); + this.metrics = new String[numMetrics]; + this.valueAggregators = new ValueAggregator[numMetrics]; + this.numericTypes = new IndexNumericFieldData.NumericType[numMetrics]; + this.metricReaders = new DocIdSetIterator[numMetrics]; + + int index = 0; + for (MetricStatFieldPair metricStatFieldPair : metricStatFieldPairs) { + metrics[index] = metricStatFieldPair.toFieldName(); + valueAggregators[index] = ValueAggregatorFactory.getValueAggregator(metricStatFieldPair.getMetricStat()); + + Mapper fieldMapper = mapperService.documentMapper().mappers().getMapper(metrics[index]); + if (fieldMapper instanceof NumberFieldMapper) { + numericTypes[index] = ((NumberFieldMapper) fieldMapper).fieldType().numericType(); + } else { + numericTypes[index] = IndexNumericFieldData.NumericType.DOUBLE; + } + // Ignore the column for COUNT aggregation function + if (valueAggregators[index].getAggregationType() != MetricStat.COUNT) { + String metricName = metricStatFieldPair.getField(); + FieldInfo metricFieldInfos = state.fieldInfos.fieldInfo(metricName); + DocValuesType metricDocValuesType = state.fieldInfos.fieldInfo(metricName).getDocValuesType(); + metricReaders[index] = starTreeDocValuesIteratorFactory.createIterator( + metricDocValuesType, + metricFieldInfos, + docValuesProducer + ); + } + index++; + } + this.maxLeafDocuments = starTreeFieldSpec.maxLeafDocs(); + } + + /** + * Generates the MetricStatFieldPairs for all the metrics on a field + * + * @return list of metric stat mapped with respective fields + */ + public List generateMetricStatFieldPairs() { + List metricStatFieldPairs = new ArrayList<>(); + for (Metric metric : this.starTreeField.getMetrics()) { + for (MetricStat metricType : metric.getMetrics()) { + MetricStatFieldPair metricStatFieldPair = new MetricStatFieldPair(metricType, metric.getField()); + metricStatFieldPairs.add(metricStatFieldPair); + } + } + return metricStatFieldPairs; + } + + /** + * Appends a star-tree document to the star-tree. + * + * @param starTreeDocument star tree document to be appended + */ + public abstract void appendStarTreeDocument(StarTreeDocument starTreeDocument) throws IOException; + + /** + * Returns the star-tree document of the given document id in the star-tree. + * + * @param docId Document dd + * @return Star tree document + */ + public abstract StarTreeDocument getStarTreeDocument(int docId) throws IOException; + + /** + * Returns the star-tree document of the given document id in the star-tree. + * + * @return Star tree document + */ + public abstract List getStarTreeDocuments() throws IOException; + + /** + * Returns the dimension value of the given document and dimension id in the star-tree. + * + * @param docId Document Id + * @param dimensionId Dimension Id + * @return Dimension value + */ + public abstract long getDimensionValue(int docId, int dimensionId) throws IOException; + + /** + * Sorts and aggregates the star-tree Document in the segment, and returns a star-tree document iterator for all the + * aggregated star-tree document. + * + *

This method reads star-tree document from segment and generates the initial star-tree document for the star-tree. + * + * @param numDocs Number of documents in the segment + * @return Iterator for the aggregated star-tree document + */ + public abstract Iterator processSegmentStarTreeDocuments(int numDocs) throws IOException; + + /** + * Generates aggregated star-tree document for star-node. + * + *

This method will do the following steps: + * + *

    + *
  • Creates a temporary buffer for the given range of documents + *
  • Replaces the value for the given dimension Id to {@code STAR} + *
  • Sorts the starTreeDocument inside the temporary buffer + *
  • Aggregates the starTreeDocument with same dimensions + *
  • Returns an iterator for the aggregated starTreeDocument + *
+ * + * @param startDocId Start document id in the star-tree + * @param endDocId End document id (exclusive) in the star-tree + * @param dimensionId Dimension id of the star-node + * @return Iterator for the aggregated starTreeDocument + */ + public abstract Iterator generateStarTreeForStarNode(int startDocId, int endDocId, int dimensionId) + throws IOException; + + /** + * Returns the next segment star-tree document + */ + protected StarTreeDocument getNextSegmentStarTreeDocument() throws IOException { + long[] dimensions = getNextSegmentStarTreeDocumentDimensions(); + Object[] metrics = getNextSegmentStarTreeDocumentMetrics(); + return new StarTreeDocument(dimensions, metrics); + } + + /** + * Returns the next segment star-tree document for the dimensions + * + * @return dimension values for each of the star-tree dimension + * @throws IOException when we are unable to iterate to the next doc + */ + long[] getNextSegmentStarTreeDocumentDimensions() throws IOException { + long[] dimensions = new long[numDimensions]; + for (int i = 0; i < numDimensions; i++) { + try { + dimensionReaders[i].nextDoc(); + } catch (IOException e) { + logger.error("unable to iterate to next doc", e); + } + + if (starTreeField.getDimensionsOrder().get(i) instanceof DateDimension) { + dimensions[i] = handleDateDimension( + dimensionsSplitOrder[i], + starTreeDocValuesIteratorFactory.getNextValue(dimensionReaders[i]) + ); + } else { + dimensions[i] = starTreeDocValuesIteratorFactory.getNextValue(dimensionReaders[i]); + } + } + return dimensions; + } + + /** + * Returns the next segment star-tree document for the metrics + * + * @return metric values for each of the star-tree metric + * @throws IOException when we are unable to iterate to the next doc + */ + private Object[] getNextSegmentStarTreeDocumentMetrics() throws IOException { + Object[] metrics = new Object[numMetrics]; + for (int i = 0; i < numMetrics; i++) { + // Ignore the column for COUNT aggregation function + if (metricReaders[i] != null) { + try { + metricReaders[i].nextDoc(); + } catch (IOException e) { + // TODO : handle null values in columns + logger.error("unable to iterate to next doc", e); + } + metrics[i] = starTreeDocValuesIteratorFactory.getNextValue(metricReaders[i]); + } + } + return metrics; + } + + /** + * Merges a segment star-tree document (raw) into the aggregated star-tree document. + * + *

Will create a new aggregated star-tree document if the current one is {@code null}. + * + * @param aggregatedStarTreeDocument Aggregated star-tree document + * @param segmentStarTreeDocument Segment star-tree document + * @return Merged starTreeDocument + */ + protected StarTreeDocument aggregateSegmentStarTreeDocument( + StarTreeDocument aggregatedStarTreeDocument, + StarTreeDocument segmentStarTreeDocument + ) { + // TODO: HANDLE KEYWORDS LATER! + if (aggregatedStarTreeDocument == null) { + long[] dimensions = Arrays.copyOf(segmentStarTreeDocument.dimensions, numDimensions); + Object[] metrics = new Object[numMetrics]; + for (int i = 0; i < numMetrics; i++) { + try { + StarTreeNumericType numericType = StarTreeNumericType.fromNumericType(numericTypes[i]); + metrics[i] = valueAggregators[i].getInitialAggregatedValue((Long) segmentStarTreeDocument.metrics[i], numericType); + } catch (IllegalArgumentException | NullPointerException e) { + logger.error("Cannot parse initial aggregated value", e); + throw new IllegalArgumentException( + "Cannot parse initial aggregated value [" + segmentStarTreeDocument.metrics[i] + "]" + ); + } + } + return new StarTreeDocument(dimensions, metrics); + } else { + for (int i = 0; i < numMetrics; i++) { + try { + StarTreeNumericType numericType = StarTreeNumericType.fromNumericType(numericTypes[i]); + aggregatedStarTreeDocument.metrics[i] = valueAggregators[i].applySegmentRawValue( + aggregatedStarTreeDocument.metrics[i], + (Long) segmentStarTreeDocument.metrics[i], + numericType + ); + } catch (IllegalArgumentException | NullPointerException e) { + logger.error("Cannot apply segment raw value", e); + throw new IllegalArgumentException("Cannot aggregate on segment value [" + segmentStarTreeDocument.metrics[i] + "]"); + } + } + return aggregatedStarTreeDocument; + } + } + + /** + * Merges a star-tree document (aggregated) into the aggregated document. + * + *

Will create a new aggregated starTreeDocument if the current one is {@code null}. + * + * @param aggregatedStarTreeDocument Aggregated star-tree document + * @param starTreeStarTreeDocument Star-tree document + * @return Merged star-tree document + */ + public StarTreeDocument aggregateStarTreeDocument( + StarTreeDocument aggregatedStarTreeDocument, + StarTreeDocument starTreeStarTreeDocument + ) { + // aggregate the documents + if (aggregatedStarTreeDocument == null) { + long[] dimensions = Arrays.copyOf(starTreeStarTreeDocument.dimensions, numDimensions); + Object[] metrics = new Object[numMetrics]; + for (int i = 0; i < numMetrics; i++) { + try { + metrics[i] = valueAggregators[i].cloneAggregatedValue(starTreeStarTreeDocument.metrics[i]); + } catch (IllegalArgumentException | NullPointerException e) { + logger.error("Cannot clone aggregated value", e); + throw new IllegalArgumentException("Cannot clone aggregated value [" + starTreeStarTreeDocument.metrics[i] + "]"); + } + } + return new StarTreeDocument(dimensions, metrics); + } else { + for (int i = 0; i < numMetrics; i++) { + try { + aggregatedStarTreeDocument.metrics[i] = valueAggregators[i].applyAggregatedValue( + starTreeStarTreeDocument.metrics[i], + aggregatedStarTreeDocument.metrics[i] + ); + } catch (IllegalArgumentException | NullPointerException e) { + logger.error("Cannot apply aggregated value", e); + throw new IllegalArgumentException("Cannot apply aggregated value [" + starTreeStarTreeDocument.metrics[i] + "]"); + } + } + return aggregatedStarTreeDocument; + } + } + + // TODO: This will be taken care in off heap implementation for merges + // public abstract void build(List starTreeValues) throws IOException; + + public void build() throws IOException { + long startTime = System.currentTimeMillis(); + logger.info("Tree of Aggregations build is a go with config {}", starTreeField); + + Iterator starTreeDocumentIterator = processSegmentStarTreeDocuments(totalDocs); + logger.info("Sorting and aggregating star-tree in ms : {}", (System.currentTimeMillis() - startTime)); + build(starTreeDocumentIterator); + logger.info("Finished Building TOA in ms : {}", (System.currentTimeMillis() - startTime)); + } + + /** + * Builds the star tree using Star-Tree Document + * + * @param starTreeDocumentIterator contains the sorted and aggregated documents + * @throws IOException when we are unable to build star-tree + */ + public void build(Iterator starTreeDocumentIterator) throws IOException { + int numSegmentStarTreeDocument = totalDocs; + + while (starTreeDocumentIterator.hasNext()) { + appendToStarTree(starTreeDocumentIterator.next()); + } + int numStarTreeDocument = numDocs; + logger.info("Generated star tree docs : [{}] from segment docs : [{}]", numStarTreeDocument, numSegmentStarTreeDocument); + + if (numDocs == 0) { + // TODO: Uncomment when segment codec is ready + // StarTreeBuilderUtils.serializeTree(indexOutput, rootNode, dimensionsSplitOrder, numNodes); + return; + } + + constructStarTree(rootNode, 0, numDocs); + int numStarTreeDocumentUnderStarNode = numDocs - numStarTreeDocument; + logger.info( + "Finished constructing star-tree, got [ {} ] tree nodes and [ {} ] starTreeDocument under star-node", + numNodes, + numStarTreeDocumentUnderStarNode + ); + + createAggregatedDocs(rootNode); + int numAggregatedStarTreeDocument = numDocs - numStarTreeDocument - numStarTreeDocumentUnderStarNode; + logger.info("Finished creating aggregated documents : {}", numAggregatedStarTreeDocument); + + // Create doc values indices in disk + // TODO: Uncomment when segment codec is ready + // createSortedDocValuesIndices(docValuesConsumer); + + // Serialize and save in disk + // TODO: Uncomment when segment codec is ready + // StarTreeBuilderUtils.serializeTree(indexOutput, rootNode, dimensionsSplitOrder, numNodes); + + // TODO: Write star tree metadata for off heap implementation + + } + + /** + * Appends a starTreeDocument to star tree + * + * @param starTreeDocument star-tree document + * @throws IOException throws an exception if we are unable to append the doc + */ + private void appendToStarTree(StarTreeDocument starTreeDocument) throws IOException { + appendStarTreeDocument(starTreeDocument); + numDocs++; + } + + /** + * Returns a new node + * + * @return return new star-tree node + */ + private StarTreeBuilderUtils.TreeNode getNewNode() { + numNodes++; + return new StarTreeBuilderUtils.TreeNode(); + } + + /** + * Implements the algorithm to construct a star-tree based on star-tree documents + * + * @param node star-tree node + * @param startDocId start document id + * @param endDocId end document id + * @throws IOException throws an exception if we are unable to construct the tree + */ + private void constructStarTree(StarTreeBuilderUtils.TreeNode node, int startDocId, int endDocId) throws IOException { + + int childDimensionId = node.dimensionId + 1; + if (childDimensionId == numDimensions) { + return; + } + + // Construct all non-star children nodes + node.childDimensionId = childDimensionId; + Map children = constructNonStarNodes(startDocId, endDocId, childDimensionId); + node.children = children; + + // Construct star-node if required + if (!skipStarNodeCreationForDimensions.contains(childDimensionId) && children.size() > 1) { + children.put((long) StarTreeBuilderUtils.ALL, constructStarNode(startDocId, endDocId, childDimensionId)); + } + + // Further split on child nodes if required + for (StarTreeBuilderUtils.TreeNode child : children.values()) { + if (child.endDocId - child.startDocId > maxLeafDocuments) { + constructStarTree(child, child.startDocId, child.endDocId); + } + } + } + + /** + * Constructs non star tree nodes + * + * @param startDocId start document id + * @param endDocId end document id + * @param dimensionId id of the dimension in the star tree + * @return root node with non-star nodes constructed + * @throws IOException throws an exception if we are unable to construct non-star nodes + */ + private Map constructNonStarNodes(int startDocId, int endDocId, int dimensionId) + throws IOException { + Map nodes = new HashMap<>(); + int nodeStartDocId = startDocId; + long nodeDimensionValue = getDimensionValue(startDocId, dimensionId); + for (int i = startDocId + 1; i < endDocId; i++) { + long dimensionValue = getDimensionValue(i, dimensionId); + if (dimensionValue != nodeDimensionValue) { + StarTreeBuilderUtils.TreeNode child = getNewNode(); + child.dimensionId = dimensionId; + child.dimensionValue = nodeDimensionValue; + child.startDocId = nodeStartDocId; + child.endDocId = i; + nodes.put(nodeDimensionValue, child); + + nodeStartDocId = i; + nodeDimensionValue = dimensionValue; + } + } + StarTreeBuilderUtils.TreeNode lastNode = getNewNode(); + lastNode.dimensionId = dimensionId; + lastNode.dimensionValue = nodeDimensionValue; + lastNode.startDocId = nodeStartDocId; + lastNode.endDocId = endDocId; + nodes.put(nodeDimensionValue, lastNode); + return nodes; + } + + /** + * Constructs star tree nodes + * + * @param startDocId start document id + * @param endDocId end document id + * @param dimensionId id of the dimension in the star tree + * @return root node with star nodes constructed + * @throws IOException throws an exception if we are unable to construct non-star nodes + */ + private StarTreeBuilderUtils.TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { + StarTreeBuilderUtils.TreeNode starNode = getNewNode(); + starNode.dimensionId = dimensionId; + starNode.dimensionValue = StarTreeBuilderUtils.ALL; + starNode.startDocId = numDocs; + Iterator starTreeDocumentIterator = generateStarTreeForStarNode(startDocId, endDocId, dimensionId); + while (starTreeDocumentIterator.hasNext()) { + appendToStarTree(starTreeDocumentIterator.next()); + } + starNode.endDocId = numDocs; + return starNode; + } + + /** + * Returns aggregated star-tree document + * + * @param node star-tree node + * @return aggregated star-tree documents + * @throws IOException throws an exception upon failing to create new aggregated docs based on star tree + */ + private StarTreeDocument createAggregatedDocs(StarTreeBuilderUtils.TreeNode node) throws IOException { + StarTreeDocument aggregatedStarTreeDocument = null; + if (node.children == null) { + // For leaf node + + if (node.startDocId == node.endDocId - 1) { + // If it has only one document, use it as the aggregated document + aggregatedStarTreeDocument = getStarTreeDocument(node.startDocId); + node.aggregatedDocId = node.startDocId; + } else { + // If it has multiple documents, aggregate all of them + for (int i = node.startDocId; i < node.endDocId; i++) { + aggregatedStarTreeDocument = aggregateStarTreeDocument(aggregatedStarTreeDocument, getStarTreeDocument(i)); + } + assert aggregatedStarTreeDocument != null; + for (int i = node.dimensionId + 1; i < numDimensions; i++) { + aggregatedStarTreeDocument.dimensions[i] = STAR_IN_DOC_VALUES_INDEX; + } + node.aggregatedDocId = numDocs; + appendToStarTree(aggregatedStarTreeDocument); + } + } else { + // For non-leaf node + if (node.children.containsKey((long) StarTreeBuilderUtils.ALL)) { + // If it has star child, use the star child aggregated document directly + for (StarTreeBuilderUtils.TreeNode child : node.children.values()) { + if (child.dimensionValue == StarTreeBuilderUtils.ALL) { + aggregatedStarTreeDocument = createAggregatedDocs(child); + node.aggregatedDocId = child.aggregatedDocId; + } else { + createAggregatedDocs(child); + } + } + } else { + // If no star child exists, aggregate all aggregated documents from non-star children + for (StarTreeBuilderUtils.TreeNode child : node.children.values()) { + aggregatedStarTreeDocument = aggregateStarTreeDocument(aggregatedStarTreeDocument, createAggregatedDocs(child)); + } + assert aggregatedStarTreeDocument != null; + for (int i = node.dimensionId + 1; i < numDimensions; i++) { + aggregatedStarTreeDocument.dimensions[i] = STAR_IN_DOC_VALUES_INDEX; + } + node.aggregatedDocId = numDocs; + appendToStarTree(aggregatedStarTreeDocument); + } + } + return aggregatedStarTreeDocument; + } + + /** + * Handles the dimension of date time field type + * + * @param fieldName name of the field + * @param val value of the field + * @return returns the converted dimension of the field to a particular granularity + */ + private long handleDateDimension(final String fieldName, final long val) { + // TODO: handle timestamp granularity + return val; + } + + public void close() throws IOException { + // boolean success = false; + // try { + // if (indexOutput != null) { + // indexOutput.writeInt(-1); + // CodecUtil.writeFooter(indexOutput); // write checksum + // } + // success = true; + // } catch (Exception e) { + // throw new RuntimeException(e); + // } finally { + // if (success) { + // IOUtils.close(indexOutput); + // } else { + // IOUtils.closeWhileHandlingException(indexOutput); + // } + // indexOutput = null; + // } + } + +} diff --git a/server/src/main/java/org/apache/lucene/index/StarTreeDocValuesWriter.java b/server/src/main/java/org/apache/lucene/index/StarTreeDocValuesWriter.java new file mode 100644 index 0000000000000..e8d96226da2ee --- /dev/null +++ b/server/src/main/java/org/apache/lucene/index/StarTreeDocValuesWriter.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.index; + +/** + * A wrapper for {@link DocValuesWriter} that contains the {@link DocValuesType} of the doc + */ +public class StarTreeDocValuesWriter { + + private final DocValuesType docValuesType; + private final DocValuesWriter docValuesWriter; + + public StarTreeDocValuesWriter(DocValuesType docValuesType, DocValuesWriter docValuesWriter) { + this.docValuesType = docValuesType; + this.docValuesWriter = docValuesWriter; + } + + /** + * Get the doc values type + */ + public DocValuesType getDocValuesType() { + return docValuesType; + } + + /** + * Get the doc values writer + */ + public DocValuesWriter getDocValuesWriter() { + return docValuesWriter; + } +} diff --git a/server/src/main/java/org/opensearch/common/inject/util/Modules.java b/server/src/main/java/org/opensearch/common/inject/util/Modules.java index ae37cb3d29a68..b5a5a83ac3af9 100644 --- a/server/src/main/java/org/opensearch/common/inject/util/Modules.java +++ b/server/src/main/java/org/opensearch/common/inject/util/Modules.java @@ -202,7 +202,7 @@ public Void visit(Binding binding) { if (!overriddenKeys.remove(binding.getKey())) { super.visit(binding); - // Record when a scope instance is used in a binding + // record when a scope instance is used in a binding Scope scope = getScopeInstanceOrNull(binding); if (scope != null) { scopeInstancesInUse.put(scope, binding.getSource()); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java index fbde296b15f7e..a5a6d07ad9bbc 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java @@ -21,7 +21,8 @@ public enum MetricStat { AVG("avg"), SUM("sum"), MIN("min"), - MAX("max"); + MAX("max"), + UNSUPPORTED("unsupported"); private final String typeName; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java index 21a709ab010d6..5dd066b34f108 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java @@ -13,9 +13,9 @@ import org.opensearch.core.xcontent.XContentBuilder; import java.io.IOException; -import java.util.List; import java.util.Locale; import java.util.Objects; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; /** @@ -27,10 +27,10 @@ public class StarTreeFieldConfiguration implements ToXContent { private final AtomicInteger maxLeafDocs = new AtomicInteger(); - private final List skipStarNodeCreationInDims; + private final Set skipStarNodeCreationInDims; private final StarTreeBuildMode buildMode; - public StarTreeFieldConfiguration(int maxLeafDocs, List skipStarNodeCreationInDims, StarTreeBuildMode buildMode) { + public StarTreeFieldConfiguration(int maxLeafDocs, Set skipStarNodeCreationInDims, StarTreeBuildMode buildMode) { this.maxLeafDocs.set(maxLeafDocs); this.skipStarNodeCreationInDims = skipStarNodeCreationInDims; this.buildMode = buildMode; @@ -87,7 +87,7 @@ public StarTreeBuildMode getBuildMode() { return buildMode; } - public List getSkipStarNodeCreationInDims() { + public Set getSkipStarNodeCreationInDims() { return skipStarNodeCreationInDims; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricStatFieldPair.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricStatFieldPair.java new file mode 100644 index 0000000000000..3fb041373ff51 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricStatFieldPair.java @@ -0,0 +1,115 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.aggregators; + +import org.opensearch.index.compositeindex.datacube.MetricStat; + +import java.util.Comparator; + +/** + * Builds aggregation function and doc values field pair to support various aggregations + * @opensearch.experimental + */ +public class MetricStatFieldPair implements Comparable { + + public static final String DELIMITER = "__"; + public static final String STAR = "*"; + public static final MetricStatFieldPair COUNT_STAR = new MetricStatFieldPair(MetricStat.COUNT, STAR); + + private final MetricStat metricStat; + private final String field; + + /** + * Constructor for MetricStatFieldPair + */ + public MetricStatFieldPair(MetricStat metricStat, String field) { + this.metricStat = metricStat; + if (metricStat == MetricStat.COUNT) { + this.field = STAR; + } else { + this.field = field; + } + } + + /** + * @return Metric Type + */ + public MetricStat getMetricStat() { + return metricStat; + } + + /** + * @return field Name + */ + public String getField() { + return field; + } + + /** + * @return field name with metric type and field + */ + public String toFieldName() { + return toFieldName(metricStat, field); + } + + /** + * Builds field name with metric type and field + */ + public static String toFieldName(MetricStat metricType, String field) { + return metricType.getTypeName() + DELIMITER + field; + } + + /** + * Builds MetricStatFieldPair from field name + */ + public static MetricStatFieldPair fromFieldName(String fieldName) { + String[] parts = fieldName.split(DELIMITER, 2); + return fromMetricAndFieldName(parts[0], parts[1]); + } + + /** + * Builds MetricStatFieldPair from metric and field name + */ + private static MetricStatFieldPair fromMetricAndFieldName(String metricName, String fieldName) { + MetricStat metricType = MetricStat.fromTypeName(metricName); + if (metricType == MetricStat.COUNT) { + return COUNT_STAR; + } else { + return new MetricStatFieldPair(metricType, fieldName); + } + } + + @Override + public int hashCode() { + return 31 * metricStat.hashCode() + field.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj instanceof MetricStatFieldPair) { + MetricStatFieldPair anotherPair = (MetricStatFieldPair) obj; + return metricStat == anotherPair.metricStat && field.equals(anotherPair.field); + } + return false; + } + + @Override + public String toString() { + return toFieldName(); + } + + @Override + public int compareTo(MetricStatFieldPair other) { + return Comparator.comparing((MetricStatFieldPair o) -> o.field) + .thenComparing((MetricStatFieldPair o) -> o.metricStat) + .compare(this, other); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java new file mode 100644 index 0000000000000..a3a14f9bbb11a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java @@ -0,0 +1,82 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.aggregators; + +import org.apache.lucene.util.NumericUtils; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.compositeindex.datacube.startree.data.DataType; +import org.opensearch.search.aggregations.metrics.CompensatedSum; + +/** + * Sum value aggregator for star tree + * + * @opensearch.internal + */ +public class SumValueAggregator implements ValueAggregator { + public static final DataType AGGREGATED_VALUE_TYPE = DataType.DOUBLE; + + @Override + public MetricStat getAggregationType() { + return MetricStat.SUM; + } + + @Override + public DataType getAggregatedValueType() { + return AGGREGATED_VALUE_TYPE; + } + + @Override + public Double getInitialAggregatedValue(Long rawValue, StarTreeNumericType starTreeNumericType) { + return starTreeNumericType.getDoubleValue(rawValue); + } + + @Override + public Double applySegmentRawValue(Double value, Long rawValue, StarTreeNumericType starTreeNumericType) { + CompensatedSum kahanSummation = new CompensatedSum(0, 0); + kahanSummation.add(value); + kahanSummation.add(starTreeNumericType.getDoubleValue(rawValue)); + return kahanSummation.value(); + } + + @Override + public Double applyAggregatedValue(Double value, Double aggregatedValue) { + CompensatedSum kahanSummation = new CompensatedSum(0, 0); + kahanSummation.add(value); + kahanSummation.add(aggregatedValue); + return kahanSummation.value(); + } + + @Override + public Double cloneAggregatedValue(Double value) { + return value; + } + + @Override + public int getMaxAggregatedValueByteSize() { + return Double.BYTES; + } + + @Override + public Long convertAggregationTypeToSortableLongValue(Double value) { + try { + return NumericUtils.doubleToSortableLong(value); + } catch (IllegalArgumentException | NullPointerException | IllegalStateException e) { + throw new IllegalArgumentException("Cannot convert " + value + " to sortable long", e); + } + } + + @Override + public Double convertSortableLongToAggregatedTypeValue(Long value, StarTreeNumericType type) { + try { + return type.getDoubleValue(value); + } catch (IllegalArgumentException | NullPointerException | IllegalStateException e) { + throw new IllegalArgumentException("Cannot convert " + value + " to sortable aggregation type", e); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java new file mode 100644 index 0000000000000..b9fa3d5b69a3f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.aggregators; + +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.compositeindex.datacube.startree.data.DataType; + +/** + * A value aggregator that pre-aggregates on the input values for a specific type of aggregation. + * @opensearch.experimental + */ +public interface ValueAggregator { + + /** + * Returns the type of the aggregation. + */ + MetricStat getAggregationType(); + + /** + * Returns the data type of the aggregated value. + */ + DataType getAggregatedValueType(); + + /** + * Returns the initial aggregated value. + */ + A getInitialAggregatedValue(Long rawValue, StarTreeNumericType starTreeNumericType); + + /** + * Applies a raw value to the current aggregated value. + */ + A applySegmentRawValue(A value, Long rawValue, StarTreeNumericType starTreeNumericType); + + /** + * Applies an aggregated value to the current aggregated value. + */ + A applyAggregatedValue(A value, A aggregatedValue); + + /** + * Clones an aggregated value. + */ + A cloneAggregatedValue(A value); + + /** + * Returns the maximum size in bytes of the aggregated values seen so far. + */ + int getMaxAggregatedValueByteSize(); + + /** + * Converts an aggregated value into a Long type. + */ + Long convertAggregationTypeToSortableLongValue(A value); + + /** + * Converts an aggregated value from a Long type. + */ + A convertSortableLongToAggregatedTypeValue(Long rawValue, StarTreeNumericType type); +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactory.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactory.java new file mode 100644 index 0000000000000..523b3fbec2e7c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactory.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.aggregators; + +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.data.DataType; + +/** + * Value aggregator factory for a given aggregation type + * @opensearch.experimental + */ +public class ValueAggregatorFactory { + private ValueAggregatorFactory() {} + + /** + * Returns a new instance of value aggregator for the given aggregation type. + * + * @param aggregationType Aggregation type + * @return Value aggregator + */ + public static ValueAggregator getValueAggregator(MetricStat aggregationType) { + switch (aggregationType) { + // other metric types (count, min, max, avg) will be supported in the future + case SUM: + return new SumValueAggregator(); + default: + throw new IllegalStateException("Unsupported aggregation type: " + aggregationType); + } + } + + /** + * Returns the data type of the aggregated value for the given aggregation type. + * + * @param aggregationType Aggregation type + * @return Data type of the aggregated value + */ + public static DataType getAggregatedValueType(MetricStat aggregationType) { + switch (aggregationType) { + // other metric types (count, min, max, avg) will be supported in the future + case SUM: + return SumValueAggregator.AGGREGATED_VALUE_TYPE; + default: + throw new IllegalStateException("Unsupported aggregation type: " + aggregationType); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericType.java new file mode 100644 index 0000000000000..ed41445ac5cdf --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericType.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype; + +import org.opensearch.index.fielddata.IndexNumericFieldData; + +import java.util.function.Function; + +public enum StarTreeNumericType { + HALF_FLOAT(IndexNumericFieldData.NumericType.HALF_FLOAT, StarTreeNumericTypeConverters::halfFloatPointToDouble), + FLOAT(IndexNumericFieldData.NumericType.FLOAT, StarTreeNumericTypeConverters::floatPointToDouble), + LONG(IndexNumericFieldData.NumericType.LONG, StarTreeNumericTypeConverters::longToDouble), + DOUBLE(IndexNumericFieldData.NumericType.DOUBLE, StarTreeNumericTypeConverters::sortableLongtoDouble); + + final IndexNumericFieldData.NumericType numericType; + final Function converter; + + StarTreeNumericType(IndexNumericFieldData.NumericType numericType, Function converter) { + this.numericType = numericType; + this.converter = converter; + } + + public double getDoubleValue(long rawValue) { + return this.converter.apply(rawValue); + } + + public static StarTreeNumericType fromNumericType(IndexNumericFieldData.NumericType numericType) { + switch (numericType) { + case HALF_FLOAT: + return StarTreeNumericType.HALF_FLOAT; + case FLOAT: + return StarTreeNumericType.FLOAT; + case LONG: + return StarTreeNumericType.LONG; + default: + return StarTreeNumericType.DOUBLE; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericTypeConverters.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericTypeConverters.java new file mode 100644 index 0000000000000..59a2c3418e931 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericTypeConverters.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype; + +import org.apache.lucene.sandbox.document.HalfFloatPoint; +import org.apache.lucene.util.NumericUtils; + +public class StarTreeNumericTypeConverters { + + public static double halfFloatPointToDouble(Long value) { + return HalfFloatPoint.sortableShortToHalfFloat((short) value.longValue()); + } + + public static double floatPointToDouble(Long value) { + return NumericUtils.sortableIntToFloat((int) value.longValue()); + } + + public static double longToDouble(Long value) { + return (double) value; + } + + public static Double sortableLongtoDouble(Long value) { + return NumericUtils.sortableLongToDouble(value); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/package-info.java new file mode 100644 index 0000000000000..b9a9bb8f1427d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Numeric Types for Composite Index Star Tree + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/package-info.java new file mode 100644 index 0000000000000..27565ffded2cf --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Aggregators for Composite Index Star Tree + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.aggregators; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/DocValuesIteratorFactory.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/DocValuesIteratorFactory.java new file mode 100644 index 0000000000000..66302f84f1449 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/DocValuesIteratorFactory.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.search.DocIdSetIterator; + +import java.io.IOException; + +/** + * An interface to support iterators for various doc values types. + * @opensearch.experimental + */ +public interface DocValuesIteratorFactory { + + /** + * Creates an iterator for the given doc values type and field using the doc values producer + */ + DocIdSetIterator createIterator(DocValuesType type, FieldInfo field, DocValuesProducer producer) throws IOException; + + /** + * Returns the next value for the given iterator + */ + long getNextValue(DocIdSetIterator iterator) throws IOException; + + /** + * Returns the doc id for the next document from the given iterator + */ + int nextDoc(DocIdSetIterator iterator) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/MultipleTreesBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/MultipleTreesBuilder.java new file mode 100644 index 0000000000000..ad3d33474840f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/MultipleTreesBuilder.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.SegmentWriteState; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.mapper.MapperService; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; +import java.util.Locale; + +public class MultipleTreesBuilder implements Closeable { + + private static final Logger logger = LogManager.getLogger(MultipleTreesBuilder.class); + + private final List starTreeFields; + private final StarTreeFieldConfiguration.StarTreeBuildMode buildMode; + private final DocValuesConsumer docValuesConsumer; + private final SegmentWriteState state; + private final DocValuesProducer docValuesProducer; + private final MapperService mapperService; + + public MultipleTreesBuilder( + List starTreeFields, + StarTreeFieldConfiguration.StarTreeBuildMode buildMode, + DocValuesProducer docValuesProducer, + DocValuesConsumer docValuesConsumer, + SegmentWriteState segmentWriteState, + MapperService mapperService + ) { + this.starTreeFields = starTreeFields; + if (starTreeFields == null || starTreeFields.isEmpty()) { + throw new IllegalArgumentException("Must provide star-tree builder configs"); + } + this.buildMode = buildMode; + this.docValuesProducer = docValuesProducer; + this.docValuesConsumer = docValuesConsumer; + this.state = segmentWriteState; + this.mapperService = mapperService; + } + + /** + * Builds the star-trees. + */ + public void build() throws Exception { + long startTime = System.currentTimeMillis(); + int numStarTrees = starTreeFields.size(); + logger.info("Starting building {} star-trees with configs: {} using {} builder", numStarTrees, starTreeFields, buildMode); + + // Build all star-trees + for (int i = 0; i < numStarTrees; i++) { + StarTreeField starTreeField = starTreeFields.get(i); + try ( + SingleTreeBuilder singleTreeBuilder = getSingleTreeBuilder( + starTreeField, + buildMode, + docValuesProducer, + docValuesConsumer, + state, + mapperService + ) + ) { + singleTreeBuilder.build(); + } + } + logger.info( + "Took {} ms to building {} star-trees with configs: {} using {} builder", + System.currentTimeMillis() - startTime, + numStarTrees, + starTreeFields, + buildMode + ); + } + + @Override + public void close() throws IOException { + + } + + private static SingleTreeBuilder getSingleTreeBuilder( + StarTreeField starTreeField, + StarTreeFieldConfiguration.StarTreeBuildMode buildMode, + DocValuesProducer docValuesProducer, + DocValuesConsumer docValuesConsumer, + SegmentWriteState state, + MapperService mapperService + ) throws IOException { + switch (buildMode) { + case ON_HEAP: + return new OnHeapSingleTreeBuilder(starTreeField, docValuesProducer, docValuesConsumer, state, mapperService); + default: + throw new IllegalArgumentException( + String.format(Locale.ROOT, "No star tree implementation is available for [%s] build mode", buildMode) + ); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapSingleTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapSingleTreeBuilder.java new file mode 100644 index 0000000000000..6baaedb1d0710 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapSingleTreeBuilder.java @@ -0,0 +1,204 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BaseSingleStarTreeBuilder; +import org.apache.lucene.index.SegmentWriteState; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.data.StarTreeDocument; +import org.opensearch.index.mapper.MapperService; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +/** + * On heap single tree builder + */ +public class OnHeapSingleTreeBuilder extends BaseSingleStarTreeBuilder { + + private final List starTreeDocuments = new ArrayList<>(); + + /** + * Constructor for OnHeapSingleTreeBuilder + * + * @param starTreeField star-tree field + * @param docValuesProducer document values producer + * @param docValuesConsumer document values consumer + * @param segmentWriteState segment write state + * @param mapperService + * @throws IOException throws an exception we are unable to construct an onheap star-tree + */ + public OnHeapSingleTreeBuilder( + StarTreeField starTreeField, + DocValuesProducer docValuesProducer, + DocValuesConsumer docValuesConsumer, + SegmentWriteState segmentWriteState, + MapperService mapperService + ) throws IOException { + super(starTreeField, docValuesProducer, docValuesConsumer, segmentWriteState, mapperService); + } + + @Override + public void appendStarTreeDocument(StarTreeDocument starTreeDocument) throws IOException { + starTreeDocuments.add(starTreeDocument); + } + + @Override + public StarTreeDocument getStarTreeDocument(int docId) throws IOException { + return starTreeDocuments.get(docId); + } + + @Override + public List getStarTreeDocuments() throws IOException { + return starTreeDocuments; + } + + // TODO: should this be just long? + @Override + public long getDimensionValue(int docId, int dimensionId) throws IOException { + return starTreeDocuments.get(docId).dimensions[dimensionId]; + } + + // Handles star-tree rebuilds during merges :) + // @Override + // public void build(List starTreeValues) throws IOException { + // TODO: This will be handled during off heap implementation for merges + // } + + @Override + public Iterator processSegmentStarTreeDocuments(int numDocs) throws IOException { + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[numDocs]; + for (int i = 0; i < numDocs; i++) { + starTreeDocuments[i] = getNextSegmentStarTreeDocument(); + } + return processStarTreeDocuments(starTreeDocuments); + } + + /** + * Sort, aggregates and merges the star-tree documents + * @param starTreeDocuments star-tree documents + * @return iterator for star-tree documents + * @throws IOException throws when unable to sort, merge and aggregate star-tree documents + */ + public Iterator processStarTreeDocuments(StarTreeDocument[] starTreeDocuments) throws IOException { + + // sort the documents + Arrays.sort(starTreeDocuments, (o1, o2) -> { + for (int i = 0; i < numDimensions; i++) { + if (o1.dimensions[i] != o2.dimensions[i]) { + return Math.toIntExact(o1.dimensions[i] - o2.dimensions[i]); + } + } + return 0; + }); + + // merge the documents + return mergeStarTreeDocuments(starTreeDocuments); + } + + /** + * Merges the star-tree documents + * @param starTreeDocuments star-tree documents + * @return iterator to aggregate star-tree documents + */ + private Iterator mergeStarTreeDocuments(StarTreeDocument[] starTreeDocuments) { + return new Iterator<>() { + boolean hasNext = true; + StarTreeDocument currentStarTreeDocument = starTreeDocuments[0]; + int docId = 1; + + @Override + public boolean hasNext() { + return hasNext; + } + + @Override + public StarTreeDocument next() { + // aggregate as we move on to the next doc + StarTreeDocument next = aggregateStarTreeDocument(null, currentStarTreeDocument); + while (docId < starTreeDocuments.length) { + StarTreeDocument starTreeDocument = starTreeDocuments[docId++]; + if (!Arrays.equals(starTreeDocument.dimensions, next.dimensions)) { + currentStarTreeDocument = starTreeDocument; + return next; + } else { + next = aggregateStarTreeDocument(next, starTreeDocument); + } + } + hasNext = false; + return next; + } + }; + } + + /** + * Generates a star-tree for a given star-node + * @param startDocId Start document id in the star-tree + * @param endDocId End document id (exclusive) in the star-tree + * @param dimensionId Dimension id of the star-node + * @return iterator for star-tree documents of star-node + * @throws IOException throws when unable to generate star-tree for star-node + */ + @Override + public Iterator generateStarTreeForStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { + int numDocs = endDocId - startDocId; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[numDocs]; + for (int i = 0; i < numDocs; i++) { + starTreeDocuments[i] = getStarTreeDocument(startDocId + i); + } + Arrays.sort(starTreeDocuments, (o1, o2) -> { + for (int i = dimensionId + 1; i < numDimensions; i++) { + if (o1.dimensions[i] != o2.dimensions[i]) { + return Math.toIntExact(o1.dimensions[i] - o2.dimensions[i]); + } + } + return 0; + }); + return new Iterator() { + boolean hasNext = true; + StarTreeDocument currentStarTreeDocument = starTreeDocuments[0]; + int docId = 1; + + private boolean hasSameDimensions(StarTreeDocument starTreeDocument1, StarTreeDocument starTreeDocument2) { + for (int i = dimensionId + 1; i < numDimensions; i++) { + if (starTreeDocument1.dimensions[i] != starTreeDocument2.dimensions[i]) { + return false; + } + } + return true; + } + + @Override + public boolean hasNext() { + return hasNext; + } + + @Override + public StarTreeDocument next() { + StarTreeDocument next = aggregateStarTreeDocument(null, currentStarTreeDocument); + next.dimensions[dimensionId] = STAR_IN_DOC_VALUES_INDEX; + while (docId < numDocs) { + StarTreeDocument starTreeDocument = starTreeDocuments[docId++]; + if (!hasSameDimensions(starTreeDocument, currentStarTreeDocument)) { + currentStarTreeDocument = starTreeDocument; + return next; + } else { + next = aggregateStarTreeDocument(next, starTreeDocument); + } + } + hasNext = false; + return next; + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SingleTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SingleTreeBuilder.java new file mode 100644 index 0000000000000..3b26f4c5e04c3 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/SingleTreeBuilder.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import java.io.Closeable; +import java.io.IOException; + +/** + * A star-tree builder that builds a single star-tree. + * @opensearch.experimental + */ +public interface SingleTreeBuilder extends Closeable { + + /** + * Builds the star tree based on star-tree field + * @throws IOException when we are unable to build star-tree + */ + void build() throws Exception; + + /** + * Builds the star tree using star-tree document values during segment merges + * @param starTreeValues star-tree document values + * @throws IOException when we are unable to build star-tree + */ + // void build(List starTreeValues) throws IOException; + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocValuesIteratorFactory.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocValuesIteratorFactory.java new file mode 100644 index 0000000000000..0105ff42045de --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeDocValuesIteratorFactory.java @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.DocIdSetIterator; + +import java.io.IOException; + +/** + * A factory class to return respective doc values iterator based on the doc volues type. + * @opensearch.experimental + */ +public class StarTreeDocValuesIteratorFactory implements DocValuesIteratorFactory { + + @Override + public DocIdSetIterator createIterator(DocValuesType type, FieldInfo field, DocValuesProducer producer) throws IOException { + switch (type) { + case SORTED_SET: + return producer.getSortedSet(field); + case SORTED_NUMERIC: + return producer.getSortedNumeric(field); + default: + throw new IllegalArgumentException("Unsupported DocValuesType: " + type); + } + } + + @Override + public long getNextValue(DocIdSetIterator iterator) throws IOException { + if (iterator instanceof SortedSetDocValues) { + return ((SortedSetDocValues) iterator).nextOrd(); + } else if (iterator instanceof SortedNumericDocValues) { + return ((SortedNumericDocValues) iterator).nextValue(); + } else { + throw new IllegalArgumentException("Unsupported Iterator: " + iterator.toString()); + } + } + + @Override + public int nextDoc(DocIdSetIterator iterator) throws IOException { + return iterator.nextDoc(); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/package-info.java new file mode 100644 index 0000000000000..80eed545ef8a5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Builders for Composite Index Star Tree + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.builder; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/data/DataType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/data/DataType.java new file mode 100644 index 0000000000000..3c737c44b7219 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/data/DataType.java @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.data; + +/** + * Data type of doc values + * @opensearch.internal + */ +public enum DataType { + INT(Integer.BYTES, true), + LONG(Long.BYTES, true), + FLOAT(Float.BYTES, true), + DOUBLE(Double.BYTES, true); + + private final int size; + private final boolean numeric; + + DataType(int size, boolean numeric) { + this.size = size; + this.numeric = numeric; + } + + /** + * Returns the number of bytes needed to store the data type. + */ + public int size() { + if (size >= 0) { + return size; + } + throw new IllegalStateException("Cannot get number of bytes for: " + this); + } + + /** + * Returns {@code true} if the data type is numeric (INT, LONG, FLOAT, DOUBLE, BIG_DECIMAL), + * {@code false} otherwise. + */ + public boolean isNumeric() { + return numeric; + } + + /** + * Converts the given string value to the data type. Returns byte[] for BYTES. + */ + public Object convert(String value) { + try { + switch (this) { + case INT: + return Integer.valueOf(value); + case LONG: + return Long.valueOf(value); + case FLOAT: + return Float.valueOf(value); + case DOUBLE: + return Double.valueOf(value); + default: + throw new IllegalStateException(); + } + } catch (Exception e) { + throw new IllegalArgumentException(e); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/data/StarTreeDocument.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/data/StarTreeDocument.java new file mode 100644 index 0000000000000..cb4253e21c141 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/data/StarTreeDocument.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.data; + +import java.util.Arrays; + +/** + * Star tree document + */ +public class StarTreeDocument { + public final long[] dimensions; + public final Object[] metrics; + + public StarTreeDocument(long[] dimensions, Object[] metrics) { + this.dimensions = dimensions; + this.metrics = metrics; + } + + @Override + public String toString() { + return Arrays.toString(dimensions) + " | " + Arrays.toString(metrics); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java new file mode 100644 index 0000000000000..3af5020b76da2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Node for Composite Index Star Tree + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.node; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeBuilderUtils.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeBuilderUtils.java new file mode 100644 index 0000000000000..9fa980277f27b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeBuilderUtils.java @@ -0,0 +1,131 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexOutput; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * Util class for building star tree + * @opensearch.experimental + */ +public class StarTreeBuilderUtils { + + private static final Logger logger = LogManager.getLogger(StarTreeBuilderUtils.class); + + // TODO: To be moved to off heap star tree implementation + public static final int NUM_INT_SERIALIZABLE_FIELDS = 6; + public static final int NUM_LONG_SERIALIZABLE_FIELDS = 1; + public static final long SERIALIZABLE_SIZE_IN_BYTES = (Integer.BYTES * NUM_INT_SERIALIZABLE_FIELDS) + (Long.BYTES + * NUM_LONG_SERIALIZABLE_FIELDS); + + private StarTreeBuilderUtils() {} + + public static final int ALL = -1; + public static final long MAGIC_MARKER = 0xBADDA55B00DAD00DL; + + /** Tree node representation */ + public static class TreeNode { + public int dimensionId = ALL; + public long dimensionValue = ALL; + public int startDocId = ALL; + public int endDocId = ALL; + public int aggregatedDocId = ALL; + public int childDimensionId = ALL; + public Map children; + } + + /** Serializes the tree */ + public static void serializeTree(IndexOutput indexOutput, TreeNode rootNode, String[] dimensions, int numNodes) throws IOException { + int headerSizeInBytes = computeHeaderByteSize(dimensions); + long totalSizeInBytes = headerSizeInBytes + (long) numNodes * SERIALIZABLE_SIZE_IN_BYTES; + + logger.info("Star tree size in bytes : {}", totalSizeInBytes); + + writeHeader(indexOutput, headerSizeInBytes, dimensions, numNodes); + writeNodes(indexOutput, rootNode); + } + + /** Computes the size of the header for the tree */ + static int computeHeaderByteSize(String[] dimensions) { + // Magic marker (8), version (4), size of header (4) and number of dimensions (4) + int headerSizeInBytes = 20; + + for (String dimension : dimensions) { + headerSizeInBytes += Integer.BYTES; // For dimension index + headerSizeInBytes += Integer.BYTES; // For length of dimension name + headerSizeInBytes += dimension.getBytes(UTF_8).length; // For dimension name + } + + headerSizeInBytes += Integer.BYTES; // For number of nodes. + return headerSizeInBytes; + } + + /** Writes the header of the tree */ + static void writeHeader(IndexOutput output, int headerSizeInBytes, String[] dimensions, int numNodes) throws IOException { + output.writeLong(MAGIC_MARKER); + output.writeInt(1); + output.writeInt(headerSizeInBytes); + output.writeInt(dimensions.length); + for (int i = 0; i < dimensions.length; i++) { + output.writeInt(i); + output.writeString(dimensions[i]); + } + output.writeInt(numNodes); + } + + /** Writes the nodes of the tree */ + static void writeNodes(IndexOutput output, TreeNode rootNode) throws IOException { + Queue queue = new LinkedList<>(); + queue.add(rootNode); + + int currentNodeId = 0; + while (!queue.isEmpty()) { + TreeNode node = queue.remove(); + + if (node.children == null) { + writeNode(output, node, ALL, ALL); + } else { + // Sort all children nodes based on dimension value + List sortedChildren = new ArrayList<>(node.children.values()); + sortedChildren.sort(Comparator.comparingLong(o -> o.dimensionValue)); + + int firstChildId = currentNodeId + queue.size() + 1; + int lastChildId = firstChildId + sortedChildren.size() - 1; + writeNode(output, node, firstChildId, lastChildId); + + queue.addAll(sortedChildren); + } + + currentNodeId++; + } + } + + /** Writes a node of the tree */ + private static void writeNode(IndexOutput output, TreeNode node, int firstChildId, int lastChildId) throws IOException { + output.writeInt(node.dimensionId); + output.writeLong(node.dimensionValue); + output.writeInt(node.startDocId); + output.writeInt(node.endDocId); + output.writeInt(node.aggregatedDocId); + output.writeInt(firstChildId); + output.writeInt(lastChildId); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/package-info.java new file mode 100644 index 0000000000000..92930de98970d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Utility to support Composite Index Star Tree + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.utils; diff --git a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java index fe9cd598a88fc..3a39d74e1fc8a 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java @@ -22,7 +22,6 @@ import org.opensearch.search.lookup.SearchLookup; import java.util.ArrayList; -import java.util.Arrays; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; @@ -83,7 +82,7 @@ public static class Builder extends ParametrizedFieldMapper.Builder { ); } paramMap.remove(MAX_LEAF_DOCS); - List skipStarInDims = Arrays.asList( + Set skipStarInDims = Set.of( XContentMapValues.nodeStringArrayValue(paramMap.getOrDefault(SKIP_STAR_NODE_IN_DIMS, new ArrayList())) ); paramMap.remove(SKIP_STAR_NODE_IN_DIMS); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeMappingIntegTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeMappingIntegTests.java index 3691abea599ac..0097574a19b85 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeMappingIntegTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeMappingIntegTests.java @@ -227,7 +227,7 @@ public void testValidCompositeIndex() { StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode() ); - assertEquals(Collections.emptyList(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims()); + assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims()); } } } @@ -311,7 +311,7 @@ public void testUpdateIndexWhenMappingIsSame() { StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode() ); - assertEquals(Collections.emptyList(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims()); + assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims()); } } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricStatFieldPairTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricStatFieldPairTests.java new file mode 100644 index 0000000000000..fd026f49c0140 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricStatFieldPairTests.java @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.aggregators; + +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.test.OpenSearchTestCase; + +public class MetricStatFieldPairTests extends OpenSearchTestCase { + + public void testConstructor() { + MetricStatFieldPair pair = new MetricStatFieldPair(MetricStat.SUM, "column1"); + assertEquals(MetricStat.SUM, pair.getMetricStat()); + assertEquals("column1", pair.getField()); + } + + public void testCountStarConstructor() { + MetricStatFieldPair pair = new MetricStatFieldPair(MetricStat.COUNT, "anything"); + assertEquals(MetricStat.COUNT, pair.getMetricStat()); + assertEquals("*", pair.getField()); + } + + public void testToFieldName() { + MetricStatFieldPair pair = new MetricStatFieldPair(MetricStat.AVG, "column2"); + assertEquals("avg__column2", pair.toFieldName()); + } + + public void testFromFieldName() { + MetricStatFieldPair pair = MetricStatFieldPair.fromFieldName("max__column3"); + assertEquals(MetricStat.MAX, pair.getMetricStat()); + assertEquals("column3", pair.getField()); + } + + public void testCountStarFromFieldName() { + MetricStatFieldPair pair = MetricStatFieldPair.fromFieldName("count__*"); + assertEquals(MetricStat.COUNT, pair.getMetricStat()); + assertEquals("*", pair.getField()); + assertSame(MetricStatFieldPair.COUNT_STAR, pair); + } + + public void testEquals() { + MetricStatFieldPair pair1 = new MetricStatFieldPair(MetricStat.SUM, "column1"); + MetricStatFieldPair pair2 = new MetricStatFieldPair(MetricStat.SUM, "column1"); + assertEquals(pair1, pair2); + assertNotEquals(pair1, new MetricStatFieldPair(MetricStat.AVG, "column1")); + assertNotEquals(pair1, new MetricStatFieldPair(MetricStat.SUM, "column2")); + } + + public void testHashCode() { + MetricStatFieldPair pair1 = new MetricStatFieldPair(MetricStat.SUM, "column1"); + MetricStatFieldPair pair2 = new MetricStatFieldPair(MetricStat.SUM, "column1"); + assertEquals(pair1.hashCode(), pair2.hashCode()); + } + + public void testCompareTo() { + MetricStatFieldPair pair1 = new MetricStatFieldPair(MetricStat.SUM, "column1"); + MetricStatFieldPair pair2 = new MetricStatFieldPair(MetricStat.SUM, "column2"); + MetricStatFieldPair pair3 = new MetricStatFieldPair(MetricStat.AVG, "column1"); + assertTrue(pair1.compareTo(pair2) < 0); + assertTrue(pair2.compareTo(pair1) > 0); + assertTrue(pair1.compareTo(pair3) > 0); + assertTrue(pair3.compareTo(pair1) < 0); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregatorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregatorTests.java new file mode 100644 index 0000000000000..58709934c894e --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregatorTests.java @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.aggregators; + +import org.apache.lucene.util.NumericUtils; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.test.OpenSearchTestCase; + +public class SumValueAggregatorTests extends OpenSearchTestCase { + + private final SumValueAggregator aggregator = new SumValueAggregator(); + + public void testGetAggregationType() { + assertEquals(MetricStat.SUM.getTypeName(), aggregator.getAggregationType().getTypeName()); + } + + public void testGetAggregatedValueType() { + assertEquals(SumValueAggregator.AGGREGATED_VALUE_TYPE, aggregator.getAggregatedValueType()); + } + + public void testGetInitialAggregatedValue() { + assertEquals(1.0, aggregator.getInitialAggregatedValue(1L, StarTreeNumericType.LONG), 0.0); + assertThrows(NullPointerException.class, () -> aggregator.getInitialAggregatedValue(null, StarTreeNumericType.DOUBLE)); + } + + public void testApplySegmentRawValue() { + assertEquals(5.0, aggregator.applySegmentRawValue(2.0, 3L, StarTreeNumericType.LONG), 0.0); + assertThrows(NullPointerException.class, () -> aggregator.applySegmentRawValue(3.14, null, StarTreeNumericType.DOUBLE)); + } + + public void testApplyAggregatedValue() { + assertEquals(5.0, aggregator.applyAggregatedValue(2.0, 3.0), 0.0); + assertEquals(7.28, aggregator.applyAggregatedValue(3.14, 4.14), 0.0000001); + } + + public void testCloneAggregatedValue() { + assertEquals(3.14, aggregator.cloneAggregatedValue(3.14), 0.0); + } + + public void testGetMaxAggregatedValueByteSize() { + assertEquals(Double.BYTES, aggregator.getMaxAggregatedValueByteSize()); + } + + public void testConvertAggregationTypeToSortableLongValue() { + SumValueAggregator aggregator = new SumValueAggregator(); + assertEquals(NumericUtils.doubleToSortableLong(3.14), aggregator.convertAggregationTypeToSortableLongValue(3.14), 0.0); + } + + public void testConvertSortableLongToAggregatedTypeValue() { + SumValueAggregator aggregator = new SumValueAggregator(); + assertEquals( + NumericUtils.sortableLongToDouble(3L), + aggregator.convertSortableLongToAggregatedTypeValue(3L, StarTreeNumericType.DOUBLE), + 0.0 + ); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactoryTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactoryTests.java new file mode 100644 index 0000000000000..f80783dacb643 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactoryTests.java @@ -0,0 +1,43 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.aggregators; + +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.data.DataType; +import org.opensearch.test.OpenSearchTestCase; + +public class ValueAggregatorFactoryTests extends OpenSearchTestCase { + + public void testGetValueAggregatorForSumType() { + ValueAggregator aggregator = ValueAggregatorFactory.getValueAggregator(MetricStat.SUM); + assertNotNull(aggregator); + assertEquals(SumValueAggregator.class, aggregator.getClass()); + } + + public void testGetValueAggregatorForUnsupportedType() { + IllegalStateException exception = expectThrows( + IllegalStateException.class, + () -> ValueAggregatorFactory.getValueAggregator(MetricStat.UNSUPPORTED) + ); + assertEquals("Unsupported aggregation type: UNSUPPORTED", exception.getMessage()); + } + + public void testGetAggregatedValueTypeForSumType() { + DataType dataType = ValueAggregatorFactory.getAggregatedValueType(MetricStat.SUM); + assertEquals(SumValueAggregator.AGGREGATED_VALUE_TYPE, dataType); + } + + public void testGetAggregatedValueTypeForUnsupportedType() { + IllegalStateException exception = expectThrows( + IllegalStateException.class, + () -> ValueAggregatorFactory.getAggregatedValueType(MetricStat.UNSUPPORTED) + ); + assertEquals("Unsupported aggregation type: UNSUPPORTED", exception.getMessage()); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseSingleStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseSingleStarTreeBuilderTests.java new file mode 100644 index 0000000000000..82be99faa8bd8 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseSingleStarTreeBuilderTests.java @@ -0,0 +1,213 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.index.BaseSingleStarTreeBuilder; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.Version; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricStatFieldPair; +import org.opensearch.index.compositeindex.datacube.startree.data.StarTreeDocument; +import org.opensearch.index.mapper.ContentPath; +import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.Mapper; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.MappingLookup; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class BaseSingleStarTreeBuilderTests extends OpenSearchTestCase { + + private static BaseSingleStarTreeBuilder builder; + private static MapperService mapperService; + private static List dimensionsOrder; + private static List fields = List.of( + "field1", + "field2", + "field3", + "field4", + "field5", + "field6", + "field7", + "field8", + "field9", + "field10" + ); + private static List metrics; + private static Directory directory; + private static FieldInfo[] fieldsInfo; + + @BeforeClass + public static void setup() throws IOException { + + dimensionsOrder = List.of(new Dimension("field1"), new Dimension("field3"), new Dimension("field5"), new Dimension("field8")); + metrics = List.of(new Metric("field2", List.of(MetricStat.SUM)), new Metric("field4", List.of(MetricStat.SUM))); + + StarTreeField compositeField = new StarTreeField( + "test", + dimensionsOrder, + metrics, + new StarTreeFieldConfiguration(1, Set.of("field8"), StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP) + ); + DocValuesConsumer docValuesConsumer = mock(DocValuesConsumer.class); + DocValuesProducer docValuesProducer = mock(DocValuesProducer.class); + directory = newFSDirectory(createTempDir()); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + 5, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + fieldsInfo = new FieldInfo[fields.size()]; + + for (int i = 0; i < fieldsInfo.length; i++) { + fieldsInfo[i] = new FieldInfo( + fields.get(i), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + FieldInfos fieldInfos = new FieldInfos(fieldsInfo); + final SegmentWriteState state = new SegmentWriteState( + InfoStream.getDefault(), + segmentInfo.dir, + segmentInfo, + fieldInfos, + null, + newIOContext(random()) + ); + + mapperService = mock(MapperService.class); + DocumentMapper documentMapper = mock(DocumentMapper.class); + when(mapperService.documentMapper()).thenReturn(documentMapper); + Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); + NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("field2", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper2 = new NumberFieldMapper.Builder("field4", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + MappingLookup fieldMappers = new MappingLookup( + Set.of(numberFieldMapper1, numberFieldMapper2), + Collections.emptyList(), + Collections.emptyList(), + 0, + null + ); + when(documentMapper.mappers()).thenReturn(fieldMappers); + + builder = new BaseSingleStarTreeBuilder(compositeField, docValuesProducer, docValuesConsumer, state, mapperService) { + @Override + public void appendStarTreeDocument(StarTreeDocument starTreeDocument) throws IOException {} + + @Override + public StarTreeDocument getStarTreeDocument(int docId) throws IOException { + return null; + } + + @Override + public List getStarTreeDocuments() throws IOException { + return List.of(); + } + + @Override + public long getDimensionValue(int docId, int dimensionId) throws IOException { + return 0; + } + + @Override + public Iterator processSegmentStarTreeDocuments(int numDocs) throws IOException { + return null; + } + + @Override + public Iterator generateStarTreeForStarNode(int startDocId, int endDocId, int dimensionId) + throws IOException { + return null; + } + }; + } + + public void test_generateMetricStatFieldPairs() throws IOException { + List metricStatFieldPairs = builder.generateMetricStatFieldPairs(); + List expectedMetricStatFieldPairs = List.of( + new MetricStatFieldPair(MetricStat.SUM, "field2"), + new MetricStatFieldPair(MetricStat.SUM, "field4") + ); + assertEquals(metricStatFieldPairs, expectedMetricStatFieldPairs); + } + + public void test_aggregateStarTreeDocument() { + StarTreeDocument starTreeDocument1 = new StarTreeDocument(new long[] { 1, 3, 5, 8 }, new Double[] { 4.0, 8.0 }); + StarTreeDocument starTreeDocument2 = new StarTreeDocument(new long[] { 1, 3, 5, 8 }, new Double[] { 10.0, 6.0 }); + + StarTreeDocument expectedeMergedStarTreeDocument = new StarTreeDocument(new long[] { 1, 3, 5, 8 }, new Double[] { 14.0, 14.0 }); + StarTreeDocument mergedStarTreeDocument = builder.aggregateStarTreeDocument(starTreeDocument1, starTreeDocument2); + + assertEquals(mergedStarTreeDocument.metrics[0], expectedeMergedStarTreeDocument.metrics[0]); + assertEquals(mergedStarTreeDocument.metrics[1], expectedeMergedStarTreeDocument.metrics[1]); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + directory.close(); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapSingleTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapSingleTreeBuilderTests.java new file mode 100644 index 0000000000000..e81c836c9d723 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapSingleTreeBuilderTests.java @@ -0,0 +1,264 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.Version; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.data.StarTreeDocument; +import org.opensearch.index.mapper.ContentPath; +import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.Mapper; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.MappingLookup; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class OnHeapSingleTreeBuilderTests extends OpenSearchTestCase { + + private static OnHeapSingleTreeBuilder builder; + private static MapperService mapperService; + private static List dimensionsOrder; + private static List fields = List.of( + "field1", + "field2", + "field3", + "field4", + "field5", + "field6", + "field7", + "field8", + "field9", + "field10" + ); + private static List metrics; + private static Directory directory; + private static FieldInfo[] fieldsInfo; + + @BeforeClass + public static void setup() throws IOException { + dimensionsOrder = List.of(new Dimension("field1"), new Dimension("field3"), new Dimension("field5"), new Dimension("field8")); + metrics = List.of(new Metric("field2", List.of(MetricStat.SUM)), new Metric("field4", List.of(MetricStat.SUM))); + + DocValuesConsumer docValuesConsumer = mock(DocValuesConsumer.class); + DocValuesProducer docValuesProducer = mock(DocValuesProducer.class); + + StarTreeField compositeField = new StarTreeField( + "test", + dimensionsOrder, + metrics, + new StarTreeFieldConfiguration(1, Set.of("field8"), StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP) + ); + directory = newFSDirectory(createTempDir()); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + 5, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + fieldsInfo = new FieldInfo[fields.size()]; + + for (int i = 0; i < fieldsInfo.length; i++) { + fieldsInfo[i] = new FieldInfo( + fields.get(i), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + FieldInfos fieldInfos = new FieldInfos(fieldsInfo); + final SegmentWriteState writeState = new SegmentWriteState( + InfoStream.getDefault(), + segmentInfo.dir, + segmentInfo, + fieldInfos, + null, + newIOContext(random()) + ); + + mapperService = mock(MapperService.class); + DocumentMapper documentMapper = mock(DocumentMapper.class); + when(mapperService.documentMapper()).thenReturn(documentMapper); + Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); + NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("field2", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + NumberFieldMapper numberFieldMapper2 = new NumberFieldMapper.Builder("field4", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + MappingLookup fieldMappers = new MappingLookup( + Set.of(numberFieldMapper1, numberFieldMapper2), + Collections.emptyList(), + Collections.emptyList(), + 0, + null + ); + when(documentMapper.mappers()).thenReturn(fieldMappers); + builder = new OnHeapSingleTreeBuilder(compositeField, docValuesProducer, docValuesConsumer, writeState, mapperService); + } + + public void test_processStarTreeDocuments() throws IOException { + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument(new long[] { 2, 4, 3, 4 }, new Double[] { 12.0, 10.0 }); + starTreeDocuments[1] = new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 10.0, 6.0 }); + starTreeDocuments[2] = new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 14.0, 12.0 }); + starTreeDocuments[3] = new StarTreeDocument(new long[] { 2, 4, 3, 4 }, new Double[] { 9.0, 4.0 }); + starTreeDocuments[4] = new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 11.0, 16.0 }); + + List inorderStarTreeDocuments = List.of( + new StarTreeDocument(new long[] { 2, 4, 3, 4 }, new Double[] { 21.0, 14.0 }), + new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 35.0, 34.0 }) + ); + Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); + Iterator starTreeDocumentIterator = builder.processStarTreeDocuments(starTreeDocuments); + int numOfAggregatedDocuments = 0; + while (starTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { + StarTreeDocument resultStarTreeDocument = starTreeDocumentIterator.next(); + StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); + + assertEquals(expectedStarTreeDocument.dimensions[0], resultStarTreeDocument.dimensions[0]); + assertEquals(expectedStarTreeDocument.dimensions[1], resultStarTreeDocument.dimensions[1]); + assertEquals(expectedStarTreeDocument.dimensions[2], resultStarTreeDocument.dimensions[2]); + assertEquals(expectedStarTreeDocument.dimensions[3], resultStarTreeDocument.dimensions[3]); + assertEquals(expectedStarTreeDocument.metrics[0], resultStarTreeDocument.metrics[0]); + assertEquals(expectedStarTreeDocument.metrics[1], resultStarTreeDocument.metrics[1]); + + numOfAggregatedDocuments++; + } + + assertEquals(inorderStarTreeDocuments.size(), numOfAggregatedDocuments); + + } + + public void test_processStarTreeDocuments_nullDocument() throws IOException { + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument(new long[] { 2, 4, 3, 4 }, new Double[] { 12.0, 10.0 }); + starTreeDocuments[1] = new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 10.0, 6.0 }); + starTreeDocuments[2] = new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 14.0, 12.0 }); + starTreeDocuments[3] = new StarTreeDocument(new long[] { 2, 4, 3, 4 }, new Double[] { 9.0, 4.0 }); + starTreeDocuments[4] = new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 11.0, null }); + StarTreeDocument expectedStarTreeDocument = new StarTreeDocument(new long[] { 2, 4, 3, 4 }, new Double[] { 21.0, 14.0 }); + Iterator starTreeDocumentIterator = builder.processStarTreeDocuments(starTreeDocuments); + + StarTreeDocument resultStarTreeDocument = starTreeDocumentIterator.next(); + assertEquals(expectedStarTreeDocument.dimensions[0], resultStarTreeDocument.dimensions[0]); + assertEquals(expectedStarTreeDocument.dimensions[1], resultStarTreeDocument.dimensions[1]); + assertEquals(expectedStarTreeDocument.dimensions[2], resultStarTreeDocument.dimensions[2]); + assertEquals(expectedStarTreeDocument.dimensions[3], resultStarTreeDocument.dimensions[3]); + assertEquals(expectedStarTreeDocument.metrics[0], resultStarTreeDocument.metrics[0]); + assertEquals(expectedStarTreeDocument.metrics[1], resultStarTreeDocument.metrics[1]); + + assertThrows("Cannot apply aggregated value [null]", IllegalArgumentException.class, starTreeDocumentIterator::next); + + } + + public void test_build() throws IOException { + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument(new long[] { 2, 4, 3, 4 }, new Double[] { 12.0, 10.0 }); + starTreeDocuments[1] = new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 10.0, 6.0 }); + starTreeDocuments[2] = new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 14.0, 12.0 }); + starTreeDocuments[3] = new StarTreeDocument(new long[] { 2, 4, 3, 4 }, new Double[] { 9.0, 4.0 }); + starTreeDocuments[4] = new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 11.0, 16.0 }); + + Iterator starTreeDocumentIterator = builder.processStarTreeDocuments(starTreeDocuments); + builder.build(starTreeDocumentIterator); + + List resultStarTreeDocuments = builder.getStarTreeDocuments(); + assertEquals(8, resultStarTreeDocuments.size()); + + List expectedStarTreeDocuments = List.of( + new StarTreeDocument(new long[] { 2, 4, 3, 4 }, new Double[] { 21.0, 14.0 }), + new StarTreeDocument(new long[] { 3, 4, 2, 1 }, new Double[] { 35.0, 34.0 }), + new StarTreeDocument(new long[] { -1, 4, 2, 1 }, new Double[] { 35.0, 34.0 }), + new StarTreeDocument(new long[] { -1, 4, 3, 4 }, new Double[] { 21.0, 14.0 }), + new StarTreeDocument(new long[] { -1, 4, -1, 1 }, new Double[] { 35.0, 34.0 }), + new StarTreeDocument(new long[] { -1, 4, -1, 4 }, new Double[] { 21.0, 14.0 }), + new StarTreeDocument(new long[] { -1, 4, -1, -1 }, new Double[] { 56.0, 48.0 }), + new StarTreeDocument(new long[] { -1, -1, -1, -1 }, new Double[] { 56.0, 48.0 }) + ); + Iterator expectedStarTreeDocumentIterator = expectedStarTreeDocuments.iterator(); + Iterator resultStarTreeDocumentIterator = resultStarTreeDocuments.iterator(); + while (resultStarTreeDocumentIterator.hasNext() && expectedStarTreeDocumentIterator.hasNext()) { + StarTreeDocument resultStarTreeDocument = resultStarTreeDocumentIterator.next(); + StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); + + assertEquals(expectedStarTreeDocument.dimensions[0], resultStarTreeDocument.dimensions[0]); + assertEquals(expectedStarTreeDocument.dimensions[1], resultStarTreeDocument.dimensions[1]); + assertEquals(expectedStarTreeDocument.dimensions[2], resultStarTreeDocument.dimensions[2]); + assertEquals(expectedStarTreeDocument.dimensions[3], resultStarTreeDocument.dimensions[3]); + assertEquals(expectedStarTreeDocument.metrics[0], resultStarTreeDocument.metrics[0]); + assertEquals(expectedStarTreeDocument.metrics[1], resultStarTreeDocument.metrics[1]); + } + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + directory.close(); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeValuesIteratorFactoryTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeValuesIteratorFactoryTests.java new file mode 100644 index 0000000000000..7f6978dbf2720 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeValuesIteratorFactoryTests.java @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.builder; + +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.DocIdSetIterator; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.util.Collections; + +import org.mockito.Mockito; + +import static org.mockito.Mockito.when; + +public class StarTreeValuesIteratorFactoryTests extends OpenSearchTestCase { + + private static StarTreeDocValuesIteratorFactory factory; + private static FieldInfo mockFieldInfo; + + @BeforeClass + public static void setup() { + factory = new StarTreeDocValuesIteratorFactory(); + mockFieldInfo = new FieldInfo( + "field", + 1, + false, + false, + true, + IndexOptions.NONE, + DocValuesType.NONE, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + + public void testCreateIterator_SortedSet() throws IOException { + DocValuesProducer producer = Mockito.mock(DocValuesProducer.class); + SortedSetDocValues iterator = Mockito.mock(SortedSetDocValues.class); + when(producer.getSortedSet(mockFieldInfo)).thenReturn(iterator); + DocIdSetIterator result = factory.createIterator(DocValuesType.SORTED_SET, mockFieldInfo, producer); + assertEquals(iterator.getClass(), result.getClass()); + } + + public void testCreateIterator_SortedNumeric() throws IOException { + DocValuesProducer producer = Mockito.mock(DocValuesProducer.class); + SortedNumericDocValues iterator = Mockito.mock(SortedNumericDocValues.class); + when(producer.getSortedNumeric(mockFieldInfo)).thenReturn(iterator); + DocIdSetIterator result = factory.createIterator(DocValuesType.SORTED_NUMERIC, mockFieldInfo, producer); + assertEquals(iterator.getClass(), result.getClass()); + } + + public void testCreateIterator_UnsupportedType() { + DocValuesProducer producer = Mockito.mock(DocValuesProducer.class); + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> { + factory.createIterator(DocValuesType.BINARY, mockFieldInfo, producer); + }); + assertEquals("Unsupported DocValuesType: BINARY", exception.getMessage()); + } + + public void testGetNextValue_SortedSet() throws IOException { + SortedSetDocValues iterator = Mockito.mock(SortedSetDocValues.class); + when(iterator.nextOrd()).thenReturn(42L); + + long result = factory.getNextValue(iterator); + assertEquals(42L, result); + } + + public void testGetNextValue_SortedNumeric() throws IOException { + SortedNumericDocValues iterator = Mockito.mock(SortedNumericDocValues.class); + when(iterator.nextValue()).thenReturn(123L); + + long result = factory.getNextValue(iterator); + assertEquals(123L, result); + } + + public void testGetNextValue_UnsupportedIterator() { + DocIdSetIterator iterator = Mockito.mock(DocIdSetIterator.class); + + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> { factory.getNextValue(iterator); }); + assertEquals("Unsupported Iterator: " + iterator.toString(), exception.getMessage()); + } + + public void testNextDoc() throws IOException { + DocIdSetIterator iterator = Mockito.mock(DocIdSetIterator.class); + when(iterator.nextDoc()).thenReturn(5); + + int result = factory.nextDoc(iterator); + assertEquals(5, result); + } +}