Merge remote-tracking branch 'origin/master' into gradle-master

apache · Jan 7, 2020 · 405d227 · 405d227
2 parents c9c0bab + 9edb143
commit 405d227
Show file tree

Hide file tree

Showing 45 changed files with 1,558 additions and 132 deletions.
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -115,15 +115,18 @@ Improvements
 
 Optimizations
 ---------------------
-(No changes)
+
+* LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand)
 
 Bug Fixes
 ---------------------
 (No changes)
 
 Other
 ---------------------
-(No changes)
+
+* LUCENE-9096: Simplification of CompressingTermVectorsWriter#flushOffsets.
+  (kkewwei via Adrien Grand)
 
 ======================= Lucene 8.4.0 =======================
 

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -29,14 +29,18 @@
 import org.apache.lucene.index.EmptyDocValuesProducer;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FilteredTermsEnum;
+import org.apache.lucene.index.ImpactsEnum;
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.OrdinalMap;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SegmentWriteState; // javadocs
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LongBitSet;
@@ -450,6 +454,102 @@ public long nextValue() throws IOException {
                           });
   }
 
+  /**
+   * A merged {@link TermsEnum}. This helps avoid relying on the default terms enum,
+   * which calls {@link SortedDocValues#lookupOrd(int)} or
+   * {@link SortedSetDocValues#lookupOrd(long)} on every call to {@link TermsEnum#next()}.
+   */
+  private static class MergedTermsEnum extends TermsEnum {
+
+    private final TermsEnum[] subs;
+    private final OrdinalMap ordinalMap;
+    private final long valueCount;
+    private long ord = -1;
+    private BytesRef term;
+
+    MergedTermsEnum(OrdinalMap ordinalMap, TermsEnum[] subs) {
+      this.ordinalMap = ordinalMap;
+      this.subs = subs;
+      this.valueCount = ordinalMap.getValueCount();
+    }
+
+    @Override
+    public BytesRef term() throws IOException {
+      return term;
+    }
+
+    @Override
+    public long ord() throws IOException {
+      return ord;
+    }
+
+    @Override
+    public BytesRef next() throws IOException {
+      if (++ord >= valueCount) {
+        return null;
+      }
+      final int subNum = ordinalMap.getFirstSegmentNumber(ord);
+      final TermsEnum sub = subs[subNum];
+      final long subOrd = ordinalMap.getFirstSegmentOrd(ord);
+      do {
+        term = sub.next();
+      } while (sub.ord() < subOrd);
+      assert sub.ord() == subOrd;
+      return term;
+    }
+
+    @Override
+    public AttributeSource attributes() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean seekExact(BytesRef text) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public SeekStatus seekCeil(BytesRef text) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void seekExact(long ord) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void seekExact(BytesRef term, TermState state) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int docFreq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long totalTermFreq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public ImpactsEnum impacts(int flags) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public TermState termState() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+  }
+
   /** Tracks state of one sorted sub-reader that we are merging */
   private static class SortedDocValuesSub extends DocIDMerger.Sub {
 
@@ -610,6 +710,15 @@ public BytesRef lookupOrd(int ord) throws IOException {
                            int segmentOrd = (int) map.getFirstSegmentOrd(ord);
                            return dvs[segmentNumber].lookupOrd(segmentOrd);
                          }
+
+                         @Override
+                         public TermsEnum termsEnum() throws IOException {
+                           TermsEnum[] subs = new TermsEnum[toMerge.size()];
+                           for (int sub = 0; sub < subs.length; ++sub) {
+                             subs[sub] = toMerge.get(sub).termsEnum();
+                           }
+                           return new MergedTermsEnum(map, subs);
+                         }
                        };
                      }
                    });
@@ -781,6 +890,15 @@ public BytesRef lookupOrd(long ord) throws IOException {
                             public long getValueCount() {
                               return map.getValueCount();
                             }
+
+                            @Override
+                            public TermsEnum termsEnum() throws IOException {
+                              TermsEnum[] subs = new TermsEnum[toMerge.size()];
+                              for (int sub = 0; sub < subs.length; ++sub) {
+                                subs[sub] = toMerge.get(sub).termsEnum();
+                              }
+                              return new MergedTermsEnum(map, subs);
+                            }
                           };
                         }
                       });

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 import org.apache.lucene.codecs.BlockTermState;
@@ -882,18 +883,17 @@ public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throw
 
     /** Pushes the new term to the top of the stack, and writes new blocks. */
     private void pushTerm(BytesRef text) throws IOException {
-      int limit = Math.min(lastTerm.length(), text.length);
-
       // Find common prefix between last term and current term:
-      int pos = 0;
-      while (pos < limit && lastTerm.byteAt(pos) == text.bytes[text.offset+pos]) {
-        pos++;
+      int prefixLength = Arrays.mismatch(lastTerm.bytes(), 0, lastTerm.length(), text.bytes, text.offset, text.offset + text.length);
+      if (prefixLength == -1) { // Only happens for the first term, if it is empty
+        assert lastTerm.length() == 0;
+        prefixLength = 0;
       }
 
       // if (DEBUG) System.out.println("  shared=" + pos + "  lastTerm.length=" + lastTerm.length);
 
       // Close the "abandoned" suffix now:
-      for(int i=lastTerm.length()-1;i>=pos;i--) {
+      for(int i=lastTerm.length()-1;i>=prefixLength;i--) {
 
         // How many items on top of the stack share the current suffix
         // we are closing:
@@ -910,7 +910,7 @@ private void pushTerm(BytesRef text) throws IOException {
       }
 
       // Init new tail:
-      for(int i=pos;i<text.length;i++) {
+      for(int i=prefixLength;i<text.length;i++) {
         prefixStarts[i] = pending.size();
       }
 

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
@@ -564,17 +564,9 @@ private void flushOffsets(int[] fieldNums) throws IOException {
           final int fieldNumOff = Arrays.binarySearch(fieldNums, fd.fieldNum);
           int pos = 0;
           for (int i = 0; i < fd.numTerms; ++i) {
-            int previousPos = 0;
-            int previousOff = 0;
-            for (int j = 0; j < fd.freqs[i]; ++j) {
-              final int position = positionsBuf[fd.posStart + pos];
-              final int startOffset = startOffsetsBuf[fd.offStart + pos];
-              sumPos[fieldNumOff] += position - previousPos;
-              sumOffsets[fieldNumOff] += startOffset - previousOff;
-              previousPos = position;
-              previousOff = startOffset;
-              ++pos;
-            }
+            sumPos[fieldNumOff] += positionsBuf[fd.posStart + fd.freqs[i]-1 + pos];
+            sumOffsets[fieldNumOff] += startOffsetsBuf[fd.offStart + fd.freqs[i]-1 + pos];
+            pos += fd.freqs[i];
           }
           assert pos == fd.totalPositions;
         }

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/package-info.java
@@ -154,7 +154,7 @@
  * {@link org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat Term Frequency data}. 
  * For each term in the dictionary, the numbers of all the
  * documents that contain that term, and the frequency of the term in that
- * document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
+ * document, unless frequencies are omitted ({@link org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
  * </li>
  * <li>
  * {@link org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat Term Proximity data}. 

diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -1332,27 +1332,27 @@ public static BaseDirectoryWrapper newMaybeVirusCheckingDirectory() {
    * See {@link #newDirectory()} for more information.
    */
   public static BaseDirectoryWrapper newDirectory(Random r) {
-    return wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY), rarely(r));
+    return wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY), rarely(r), false);
   }
 
   /**
    * Returns a new Directory instance, using the specified random.
    * See {@link #newDirectory()} for more information.
    */
   public static BaseDirectoryWrapper newDirectory(Random r, LockFactory lf) {
-    return wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY, lf), rarely(r));
+    return wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY, lf), rarely(r), false);
   }
 
   public static MockDirectoryWrapper newMockDirectory() {
     return newMockDirectory(random());
   }
 
   public static MockDirectoryWrapper newMockDirectory(Random r) {
-    return (MockDirectoryWrapper) wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY), false);
+    return (MockDirectoryWrapper) wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY), false, false);
   }
 
   public static MockDirectoryWrapper newMockDirectory(Random r, LockFactory lf) {
-    return (MockDirectoryWrapper) wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY, lf), false);
+    return (MockDirectoryWrapper) wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY, lf), false, false);
   }
 
   public static MockDirectoryWrapper newMockFSDirectory(Path f) {
@@ -1416,10 +1416,7 @@ private static BaseDirectoryWrapper newFSDirectory(Path f, LockFactory lf, boole
       }
 
       Directory fsdir = newFSDirectoryImpl(clazz, f, lf);
-      if (rarely()) {
-
-      }
-      BaseDirectoryWrapper wrapped = wrapDirectory(random(), fsdir, bare);
+      BaseDirectoryWrapper wrapped = wrapDirectory(random(), fsdir, bare, true);
       return wrapped;
     } catch (Exception e) {
       Rethrow.rethrow(e);
@@ -1447,11 +1444,13 @@ public static BaseDirectoryWrapper newDirectory(Random r, Directory d) throws IO
         impl.copyFrom(d, file, file, newIOContext(r));
       }
     }
-    return wrapDirectory(r, impl, rarely(r));
+    return wrapDirectory(r, impl, rarely(r), false);
   }
 
-  private static BaseDirectoryWrapper wrapDirectory(Random random, Directory directory, boolean bare) {
-    if (rarely(random) && !bare) {
+  private static BaseDirectoryWrapper wrapDirectory(Random random, Directory directory, boolean bare, boolean filesystem) {
+    // IOContext randomization might make NRTCachingDirectory make bad decisions, so avoid
+    // using it if the user requested a filesystem directory.
+    if (rarely(random) && !bare && filesystem == false) {
       directory = new NRTCachingDirectory(directory, random.nextDouble(), random.nextDouble());
     }
 

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
@@ -77,6 +77,10 @@ Upgrade Notes
 
 * SOLR-14092: Deprecated BlockJoinFacetComponent and BlockJoinDocSetFacetComponent are removed
   Users are encouraged to migrate to uniqueBlock() in JSON Facet API.  (Mikhail Khludnev)
+
+* SOLR-13985: Solr's Jetty now binds to localhost network interface by default for better out of the box security.
+  Administrators that need Solr exposed more broadly can change the SOLR_JETTY_HOST property in their Solr include
+  (solr.in.sh/solr.in.cmd) file. (Jason Gerlowski, David Smiley, Robert Muir)
 
 Improvements
 ----------------------
@@ -149,12 +153,14 @@ Upgrade Notes
   If you prefer to keep the old (but insecure) serialization strategy, you can start your nodes using the
   property: `-Dsolr.useUnsafeOverseerResponse=true`. Keep in mind that this will be removed in future version of Solr.
 
- * SOLR-13808: add cache=false into uderneath BoolQParser's filter clause or {"bool":{"filter":..}} to avoid caching in
+ * SOLR-13808: add cache=false into underneath BoolQParser's filter clause or {"bool":{"filter":..}} to avoid caching in
    filterCache. (Mikhail Khludnev)
 
 New Features
 ---------------------
-(No changes)
+ * SOLR-12490: Introducing json.queries in JSON Request API. Every property of this object holds one or many named 
+   Query DSL queries. It's optional and doesn't impact response without explicit referencing these queries by names 
+   (Anatolii Siuniaev via Mikhail Khludnev)
 
 Improvements
 ---------------------
@@ -187,6 +193,8 @@ Improvements
   hl.fragsizeIsMinimum, with defaults that aim to better center matches in fragments than previously. See the ref guide.
   Regardless of the settings, the passages may be sized differently than before. (Nándor Mátravölgyi, David Smiley)
 
+* SOLR-14154: Return correct isolation level when retrieving it from the SQL Connection (Nick Vercammen, Kevin Risden)
+
 Optimizations
 ---------------------
 (No changes)
@@ -204,6 +212,10 @@ Bug Fixes
 
 * SOLR-14122: SimUtils converts v2 to v1 request params incorrectly. (Li Cao, ab)
 
+* SOLR-13089: Fix lsof edge cases in the solr CLI script (Martijn Koster via janhoy)
+
+* SOLR-11746: Fixed existence query support for numeric point fields. (Kai Chan, hossman, Houston Putman)
+
 Other Changes
 ---------------------
 

diff --git a/solr/bin/postlogs b/solr/bin/postlogs
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+############################################################################################
+#
+# A command line tool for indexing Solr logs in the out-of-the-box log format.
+#
+# First build the Solr distribution. Then run postlogs from inside the Solr distribution
+# home directory as described below:
+#
+# parameters:
+#
+#      -- baseUrl: Example http://localhost:8983/solr/collection1
+#      -- rootDir: All files found at or below the root will be indexed
+#
+# Sample syntax: ./bin/postlogs http://localhost:8983/solr/collection1 /user/foo/logs");
+#
+#
+############################################################################################
+
+java -classpath dist/*:dist/solrj-lib/*: org.apache.solr.util.SolrLogPostTool $1 $2