Skip to content

Commit

Permalink
[Star Tree] Lucene Abstractions for Star Tree File Formats (#15278)
Browse files Browse the repository at this point in the history
---------
Signed-off-by: Sarthak Aggarwal <[email protected]>
(cherry picked from commit 9e5604b)
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
github-actions[bot] committed Aug 27, 2024
1 parent 00f1ecb commit b731ff1
Show file tree
Hide file tree
Showing 17 changed files with 588 additions and 7 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.codecs.lucene90;

import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.SegmentWriteState;

import java.io.Closeable;
import java.io.IOException;

/**
* This class is an abstraction of the {@link DocValuesConsumer} for the Star Tree index structure.
* It is responsible to consume various types of document values (numeric, binary, sorted, sorted numeric,
* and sorted set) for fields in the Star Tree index.
*
* @opensearch.experimental
*/
public class Lucene90DocValuesConsumerWrapper implements Closeable {

private final Lucene90DocValuesConsumer lucene90DocValuesConsumer;

public Lucene90DocValuesConsumerWrapper(
SegmentWriteState state,
String dataCodec,
String dataExtension,
String metaCodec,
String metaExtension
) throws IOException {
lucene90DocValuesConsumer = new Lucene90DocValuesConsumer(state, dataCodec, dataExtension, metaCodec, metaExtension);
}

public Lucene90DocValuesConsumer getLucene90DocValuesConsumer() {
return lucene90DocValuesConsumer;
}

@Override
public void close() throws IOException {
lucene90DocValuesConsumer.close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.codecs.lucene90;

import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.SegmentReadState;

import java.io.Closeable;
import java.io.IOException;

/**
* This class is a custom abstraction of the {@link DocValuesProducer} for the Star Tree index structure.
* It is responsible for providing access to various types of document values (numeric, binary, sorted, sorted numeric,
* and sorted set) for fields in the Star Tree index.
*
* @opensearch.experimental
*/
public class Lucene90DocValuesProducerWrapper implements Closeable {

private final Lucene90DocValuesProducer lucene90DocValuesProducer;

public Lucene90DocValuesProducerWrapper(
SegmentReadState state,
String dataCodec,
String dataExtension,
String metaCodec,
String metaExtension
) throws IOException {
lucene90DocValuesProducer = new Lucene90DocValuesProducer(state, dataCodec, dataExtension, metaCodec, metaExtension);
}

public DocValuesProducer getLucene90DocValuesProducer() {
return lucene90DocValuesProducer;
}

@Override
public void close() throws IOException {
lucene90DocValuesProducer.close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.util.Counter;

/**
* A wrapper class for writing sorted numeric doc values.
* <p>
* This class provides a convenient way to add sorted numeric doc values to a field
* and retrieve the corresponding {@link SortedNumericDocValues} instance.
*
* @opensearch.experimental
*/
public class SortedNumericDocValuesWriterWrapper {

private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter;

/**
* Sole constructor. Constructs a new {@link SortedNumericDocValuesWriterWrapper} instance.
*
* @param fieldInfo the field information for the field being written
* @param counter a counter for tracking memory usage
*/
public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter) {
sortedNumericDocValuesWriter = new SortedNumericDocValuesWriter(fieldInfo, counter);
}

/**
* Adds a value to the sorted numeric doc values for the specified document.
*
* @param docID the document ID
* @param value the value to add
*/
public void addValue(int docID, long value) {
sortedNumericDocValuesWriter.addValue(docID, value);
}

/**
* Returns the {@link SortedNumericDocValues} instance containing the sorted numeric doc values
*
* @return the {@link SortedNumericDocValues} instance
*/
public SortedNumericDocValues getDocValues() {
return sortedNumericDocValuesWriter.getDocValues();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.codec.composite.composite99.Composite99Codec;
import org.opensearch.index.mapper.MapperService;

import java.util.HashMap;
Expand All @@ -29,6 +30,10 @@
*/
@ExperimentalApi
public class CompositeCodecFactory {

// we can use this to track the latest composite codec
public static final String COMPOSITE_CODEC = Composite99Codec.COMPOSITE_INDEX_CODEC_NAME;

public CompositeCodecFactory() {}

public Map<String, Codec> getCompositeIndexCodecs(MapperService mapperService, Logger logger) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumerWrapper;
import org.apache.lucene.index.SegmentWriteState;

import java.io.IOException;

/**
* A factory class that provides a factory method for creating {@link DocValuesConsumer} instances
* for the latest composite codec.
* <p>
* The segments are written using the latest composite codec. The codec
* internally manages calling the appropriate consumer factory for its abstractions.
* <p>
* This design ensures forward compatibility for writing operations
*
* @opensearch.experimental
*/
public class LuceneDocValuesConsumerFactory {

Check warning on line 28 in server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java#L28

Added line #L28 was not covered by tests

public static DocValuesConsumer getDocValuesConsumerForCompositeCodec(
SegmentWriteState state,
String dataCodec,
String dataExtension,
String metaCodec,
String metaExtension
) throws IOException {
try (
Lucene90DocValuesConsumerWrapper lucene90DocValuesConsumerWrapper = new Lucene90DocValuesConsumerWrapper(
state,
dataCodec,
dataExtension,
metaCodec,
metaExtension
)
) {
return lucene90DocValuesConsumerWrapper.getLucene90DocValuesConsumer();
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducerWrapper;
import org.apache.lucene.index.SegmentReadState;
import org.opensearch.index.codec.composite.composite99.Composite99Codec;

import java.io.IOException;

/**
* A factory class that provides a factory method for creating {@link DocValuesProducer} instances
* based on the specified composite codec.
* <p>
* In producers, we want to ensure compatibility with older codec versions during the segment reads.
* This approach allows for writing with only the latest codec while maintaining
* the ability to read data encoded with any codec version present in the segment.
* <p>
* This design ensures backward compatibility for reads across different codec versions.
*
* @opensearch.experimental
*/
public class LuceneDocValuesProducerFactory {

Check warning on line 30 in server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java#L30

Added line #L30 was not covered by tests

public static DocValuesProducer getDocValuesProducerForCompositeCodec(
String compositeCodec,
SegmentReadState state,
String dataCodec,
String dataExtension,
String metaCodec,
String metaExtension
) throws IOException {

switch (compositeCodec) {
case Composite99Codec.COMPOSITE_INDEX_CODEC_NAME:
try (
Lucene90DocValuesProducerWrapper lucene90DocValuesProducerWrapper = new Lucene90DocValuesProducerWrapper(
state,
dataCodec,
dataExtension,
metaCodec,
metaExtension
)
) {
return lucene90DocValuesProducerWrapper.getLucene90DocValuesProducer();
}
default:
throw new IllegalStateException("Invalid composite codec " + "[" + compositeCodec + "]");
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* compatible open source license.
*/

package org.opensearch.index.codec.composite;
package org.opensearch.index.codec.composite.composite99;

import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* compatible open source license.
*/

package org.opensearch.index.codec.composite;
package org.opensearch.index.codec.composite.composite99;

import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* compatible open source license.
*/

package org.opensearch.index.codec.composite;
package org.opensearch.index.codec.composite.composite99;

import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
Expand All @@ -17,6 +17,9 @@
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
import org.opensearch.index.codec.composite.CompositeIndexReader;
import org.opensearch.index.codec.composite.CompositeIndexValues;

import java.io.IOException;
import java.util.ArrayList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* compatible open source license.
*/

package org.opensearch.index.codec.composite;
package org.opensearch.index.codec.composite.composite99;

import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
Expand All @@ -18,6 +18,9 @@
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedNumericDocValues;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
import org.opensearch.index.codec.composite.CompositeIndexReader;
import org.opensearch.index.codec.composite.CompositeIndexValues;
import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeField;
import org.opensearch.index.compositeindex.datacube.startree.builder.StarTreesBuilder;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

/**
* Responsible for handling all composite index codecs and operations associated with Composite99 codec
*/
package org.opensearch.index.codec.composite.composite99;
Original file line number Diff line number Diff line change
@@ -1 +1 @@
org.opensearch.index.codec.composite.Composite99Codec
org.opensearch.index.codec.composite.composite99.Composite99Codec
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.codec.composite.Composite99Codec;
import org.opensearch.index.codec.composite.composite99.Composite99Codec;
import org.opensearch.index.engine.EngineConfig;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.similarity.SimilarityService;
Expand Down
Loading

0 comments on commit b731ff1

Please sign in to comment.