diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 27468ecb15c..4f64ef6a95b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -9,6 +9,11 @@ Bug Fixes --------------------- (No changes) +API Changes +--------------------- + +* GITHUB#13845: Add missing with-discountOverlaps Similarity constructor variants. (Pierre Salagnac, Christine Poerschke, Robert Muir) + ======================== Lucene 9.12.0 ======================= Security Fixes @@ -47,6 +52,9 @@ API Changes the entire segment should be scored. Subclasses that override the method should instead override its replacement. (Luca Cavanna) +* GITHUB#13757: For similarities, provide default computeNorm implementation and remove remaining discountOverlaps setters. + (Christine Poerschke, Adrien Grand, Robert Muir) + New Features --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java index 77f71782e31..b4546946acf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java @@ -44,13 +44,26 @@ public abstract class Axiomatic extends SimilarityBase { protected final int queryLen; /** - * Constructor setting all Axiomatic hyperparameters + * Constructor setting all Axiomatic hyperparameters and using default discountOverlaps value. * * @param s hyperparam for the growth function * @param queryLen the query length * @param k hyperparam for the primitive weighting function */ public Axiomatic(float s, int queryLen, float k) { + this(true, s, queryLen, k); + } + + /** + * Constructor setting all Axiomatic hyperparameters + * + * @param discountOverlaps true if overlap tokens should not impact document length for scoring. + * @param s hyperparam for the growth function + * @param queryLen the query length + * @param k hyperparam for the primitive weighting function + */ + public Axiomatic(boolean discountOverlaps, float s, int queryLen, float k) { + super(discountOverlaps); if (Float.isFinite(s) == false || Float.isNaN(s) || s < 0 || s > 1) { throw new IllegalArgumentException("illegal s value: " + s + ", must be between 0 and 1"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java index b9c651008cc..34d619ea69f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java @@ -46,11 +46,23 @@ public class DFISimilarity extends SimilarityBase { private final Independence independence; /** - * Create DFI with the specified divergence from independence measure + * Create DFI with the specified divergence from independence measure and using default + * discountOverlaps value * * @param independenceMeasure measure of divergence from independence */ public DFISimilarity(Independence independenceMeasure) { + this(independenceMeasure, true); + } + + /** + * Create DFI with the specified parameters + * + * @param independenceMeasure measure of divergence from independence + * @param discountOverlaps true if overlap tokens should not impact document length for scoring. + */ + public DFISimilarity(Independence independenceMeasure, boolean discountOverlaps) { + super(discountOverlaps); this.independence = independenceMeasure; } diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java index 0b3c1a5e7f0..08e424b3230 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java @@ -83,7 +83,7 @@ public class DFRSimilarity extends SimilarityBase { protected final Normalization normalization; /** - * Creates DFRSimilarity from the three components. + * Creates DFRSimilarity from the three components and using default discountOverlaps value. * *
Note that null
values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
@@ -98,7 +98,7 @@ public DFRSimilarity(
}
/**
- * Creates DFRSimilarity from the three components.
+ * Creates DFRSimilarity from the three components and with the specified discountOverlaps value.
*
*
Note that null
values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
index 5b0e93571b1..d2325d20033 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
@@ -76,7 +76,7 @@ public class IBSimilarity extends SimilarityBase {
protected final Normalization normalization;
/**
- * Creates IBSimilarity from the three components.
+ * Creates IBSimilarity from the three components and using default discountOverlaps value.
*
*
Note that null
values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
@@ -86,6 +86,26 @@ public class IBSimilarity extends SimilarityBase {
* @param normalization term frequency normalization
*/
public IBSimilarity(Distribution distribution, Lambda lambda, Normalization normalization) {
+ this(distribution, lambda, normalization, true);
+ }
+
+ /**
+ * Creates IBSimilarity from the three components and with the specified discountOverlaps value.
+ *
+ *
Note that null
values are not allowed: if you want no normalization, instead
+ * pass {@link NoNormalization}.
+ *
+ * @param distribution probabilistic distribution modeling term occurrence
+ * @param lambda distribution's λw parameter
+ * @param normalization term frequency normalization
+ * @param discountOverlaps true if overlap tokens should not impact document length for scoring.
+ */
+ public IBSimilarity(
+ Distribution distribution,
+ Lambda lambda,
+ Normalization normalization,
+ boolean discountOverlaps) {
+ super(discountOverlaps);
this.distribution = distribution;
this.lambda = lambda;
this.normalization = normalization;
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/IndriDirichletSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/IndriDirichletSimilarity.java
index 9f708362bb5..b3994c5dc46 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/IndriDirichletSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/IndriDirichletSimilarity.java
@@ -37,6 +37,13 @@ public class IndriDirichletSimilarity extends LMSimilarity {
/** The μ parameter. */
private final float mu;
+ /** Instantiates the similarity with the provided parameters. */
+ public IndriDirichletSimilarity(
+ CollectionModel collectionModel, boolean discountOverlaps, float mu) {
+ super(collectionModel, discountOverlaps);
+ this.mu = mu;
+ }
+
/** Instantiates the similarity with the provided μ parameter. */
public IndriDirichletSimilarity(CollectionModel collectionModel, float mu) {
super(collectionModel);
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
index 51b1604aef1..ab80d0d337e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
@@ -39,7 +39,13 @@ public class LMDirichletSimilarity extends LMSimilarity {
/** Instantiates the similarity with the provided μ parameter. */
public LMDirichletSimilarity(CollectionModel collectionModel, float mu) {
- super(collectionModel);
+ this(collectionModel, true, mu);
+ }
+
+ /** Instantiates the similarity with the provided parameters. */
+ public LMDirichletSimilarity(
+ CollectionModel collectionModel, boolean discountOverlaps, float mu) {
+ super(collectionModel, discountOverlaps);
if (Float.isFinite(mu) == false || mu < 0) {
throw new IllegalArgumentException(
"illegal mu value: " + mu + ", must be a non-negative finite value");
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
index e1990f34b0b..7029fa8e133 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
@@ -43,7 +43,13 @@ public class LMJelinekMercerSimilarity extends LMSimilarity {
/** Instantiates with the specified collectionModel and λ parameter. */
public LMJelinekMercerSimilarity(CollectionModel collectionModel, float lambda) {
- super(collectionModel);
+ this(collectionModel, true, lambda);
+ }
+
+ /** Instantiates with the specified collectionModel and parameters. */
+ public LMJelinekMercerSimilarity(
+ CollectionModel collectionModel, boolean discountOverlaps, float lambda) {
+ super(collectionModel, discountOverlaps);
if (Float.isNaN(lambda) || lambda <= 0 || lambda > 1) {
throw new IllegalArgumentException("lambda must be in the range (0 .. 1]");
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
index e1536db268f..5bd48f37a34 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
@@ -43,6 +43,12 @@ public abstract class LMSimilarity extends SimilarityBase {
/** Creates a new instance with the specified collection language model. */
public LMSimilarity(CollectionModel collectionModel) {
+ this(collectionModel, true);
+ }
+
+ /** Creates a new instance with the specified collection language model and discountOverlaps. */
+ public LMSimilarity(CollectionModel collectionModel, boolean discountOverlaps) {
+ super(discountOverlaps);
this.collectionModel = collectionModel;
}