Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into gradle-master
Browse files Browse the repository at this point in the history
  • Loading branch information
dweiss committed Jan 7, 2020
2 parents c9c0bab + 9edb143 commit 405d227
Show file tree
Hide file tree
Showing 45 changed files with 1,558 additions and 132 deletions.
7 changes: 5 additions & 2 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,18 @@ Improvements

Optimizations
---------------------
(No changes)

* LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand)

Bug Fixes
---------------------
(No changes)

Other
---------------------
(No changes)

* LUCENE-9096: Simplification of CompressingTermVectorsWriter#flushOffsets.
(kkewwei via Adrien Grand)

======================= Lucene 8.4.0 =======================

Expand Down
118 changes: 118 additions & 0 deletions lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,18 @@
import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentWriteState; // javadocs
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
Expand Down Expand Up @@ -450,6 +454,102 @@ public long nextValue() throws IOException {
});
}

/**
* A merged {@link TermsEnum}. This helps avoid relying on the default terms enum,
* which calls {@link SortedDocValues#lookupOrd(int)} or
* {@link SortedSetDocValues#lookupOrd(long)} on every call to {@link TermsEnum#next()}.
*/
private static class MergedTermsEnum extends TermsEnum {

private final TermsEnum[] subs;
private final OrdinalMap ordinalMap;
private final long valueCount;
private long ord = -1;
private BytesRef term;

MergedTermsEnum(OrdinalMap ordinalMap, TermsEnum[] subs) {
this.ordinalMap = ordinalMap;
this.subs = subs;
this.valueCount = ordinalMap.getValueCount();
}

@Override
public BytesRef term() throws IOException {
return term;
}

@Override
public long ord() throws IOException {
return ord;
}

@Override
public BytesRef next() throws IOException {
if (++ord >= valueCount) {
return null;
}
final int subNum = ordinalMap.getFirstSegmentNumber(ord);
final TermsEnum sub = subs[subNum];
final long subOrd = ordinalMap.getFirstSegmentOrd(ord);
do {
term = sub.next();
} while (sub.ord() < subOrd);
assert sub.ord() == subOrd;
return term;
}

@Override
public AttributeSource attributes() {
throw new UnsupportedOperationException();
}

@Override
public boolean seekExact(BytesRef text) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public void seekExact(BytesRef term, TermState state) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public int docFreq() throws IOException {
throw new UnsupportedOperationException();
}

@Override
public long totalTermFreq() throws IOException {
throw new UnsupportedOperationException();
}

@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public ImpactsEnum impacts(int flags) throws IOException {
throw new UnsupportedOperationException();
}

@Override
public TermState termState() throws IOException {
throw new UnsupportedOperationException();
}

}

/** Tracks state of one sorted sub-reader that we are merging */
private static class SortedDocValuesSub extends DocIDMerger.Sub {

Expand Down Expand Up @@ -610,6 +710,15 @@ public BytesRef lookupOrd(int ord) throws IOException {
int segmentOrd = (int) map.getFirstSegmentOrd(ord);
return dvs[segmentNumber].lookupOrd(segmentOrd);
}

@Override
public TermsEnum termsEnum() throws IOException {
TermsEnum[] subs = new TermsEnum[toMerge.size()];
for (int sub = 0; sub < subs.length; ++sub) {
subs[sub] = toMerge.get(sub).termsEnum();
}
return new MergedTermsEnum(map, subs);
}
};
}
});
Expand Down Expand Up @@ -781,6 +890,15 @@ public BytesRef lookupOrd(long ord) throws IOException {
public long getValueCount() {
return map.getValueCount();
}

@Override
public TermsEnum termsEnum() throws IOException {
TermsEnum[] subs = new TermsEnum[toMerge.size()];
for (int sub = 0; sub < subs.length; ++sub) {
subs[sub] = toMerge.get(sub).termsEnum();
}
return new MergedTermsEnum(map, subs);
}
};
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.lucene.codecs.BlockTermState;
Expand Down Expand Up @@ -882,18 +883,17 @@ public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throw

/** Pushes the new term to the top of the stack, and writes new blocks. */
private void pushTerm(BytesRef text) throws IOException {
int limit = Math.min(lastTerm.length(), text.length);

// Find common prefix between last term and current term:
int pos = 0;
while (pos < limit && lastTerm.byteAt(pos) == text.bytes[text.offset+pos]) {
pos++;
int prefixLength = Arrays.mismatch(lastTerm.bytes(), 0, lastTerm.length(), text.bytes, text.offset, text.offset + text.length);
if (prefixLength == -1) { // Only happens for the first term, if it is empty
assert lastTerm.length() == 0;
prefixLength = 0;
}

// if (DEBUG) System.out.println(" shared=" + pos + " lastTerm.length=" + lastTerm.length);

// Close the "abandoned" suffix now:
for(int i=lastTerm.length()-1;i>=pos;i--) {
for(int i=lastTerm.length()-1;i>=prefixLength;i--) {

// How many items on top of the stack share the current suffix
// we are closing:
Expand All @@ -910,7 +910,7 @@ private void pushTerm(BytesRef text) throws IOException {
}

// Init new tail:
for(int i=pos;i<text.length;i++) {
for(int i=prefixLength;i<text.length;i++) {
prefixStarts[i] = pending.size();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -564,17 +564,9 @@ private void flushOffsets(int[] fieldNums) throws IOException {
final int fieldNumOff = Arrays.binarySearch(fieldNums, fd.fieldNum);
int pos = 0;
for (int i = 0; i < fd.numTerms; ++i) {
int previousPos = 0;
int previousOff = 0;
for (int j = 0; j < fd.freqs[i]; ++j) {
final int position = positionsBuf[fd.posStart + pos];
final int startOffset = startOffsetsBuf[fd.offStart + pos];
sumPos[fieldNumOff] += position - previousPos;
sumOffsets[fieldNumOff] += startOffset - previousOff;
previousPos = position;
previousOff = startOffset;
++pos;
}
sumPos[fieldNumOff] += positionsBuf[fd.posStart + fd.freqs[i]-1 + pos];
sumOffsets[fieldNumOff] += startOffsetsBuf[fd.offStart + fd.freqs[i]-1 + pos];
pos += fd.freqs[i];
}
assert pos == fd.totalPositions;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@
* {@link org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat Term Frequency data}.
* For each term in the dictionary, the numbers of all the
* documents that contain that term, and the frequency of the term in that
* document, unless frequencies are omitted (IndexOptions.DOCS_ONLY)
* document, unless frequencies are omitted ({@link org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
* </li>
* <li>
* {@link org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat Term Proximity data}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1332,27 +1332,27 @@ public static BaseDirectoryWrapper newMaybeVirusCheckingDirectory() {
* See {@link #newDirectory()} for more information.
*/
public static BaseDirectoryWrapper newDirectory(Random r) {
return wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY), rarely(r));
return wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY), rarely(r), false);
}

/**
* Returns a new Directory instance, using the specified random.
* See {@link #newDirectory()} for more information.
*/
public static BaseDirectoryWrapper newDirectory(Random r, LockFactory lf) {
return wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY, lf), rarely(r));
return wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY, lf), rarely(r), false);
}

public static MockDirectoryWrapper newMockDirectory() {
return newMockDirectory(random());
}

public static MockDirectoryWrapper newMockDirectory(Random r) {
return (MockDirectoryWrapper) wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY), false);
return (MockDirectoryWrapper) wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY), false, false);
}

public static MockDirectoryWrapper newMockDirectory(Random r, LockFactory lf) {
return (MockDirectoryWrapper) wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY, lf), false);
return (MockDirectoryWrapper) wrapDirectory(r, newDirectoryImpl(r, TEST_DIRECTORY, lf), false, false);
}

public static MockDirectoryWrapper newMockFSDirectory(Path f) {
Expand Down Expand Up @@ -1416,10 +1416,7 @@ private static BaseDirectoryWrapper newFSDirectory(Path f, LockFactory lf, boole
}

Directory fsdir = newFSDirectoryImpl(clazz, f, lf);
if (rarely()) {

}
BaseDirectoryWrapper wrapped = wrapDirectory(random(), fsdir, bare);
BaseDirectoryWrapper wrapped = wrapDirectory(random(), fsdir, bare, true);
return wrapped;
} catch (Exception e) {
Rethrow.rethrow(e);
Expand Down Expand Up @@ -1447,11 +1444,13 @@ public static BaseDirectoryWrapper newDirectory(Random r, Directory d) throws IO
impl.copyFrom(d, file, file, newIOContext(r));
}
}
return wrapDirectory(r, impl, rarely(r));
return wrapDirectory(r, impl, rarely(r), false);
}

private static BaseDirectoryWrapper wrapDirectory(Random random, Directory directory, boolean bare) {
if (rarely(random) && !bare) {
private static BaseDirectoryWrapper wrapDirectory(Random random, Directory directory, boolean bare, boolean filesystem) {
// IOContext randomization might make NRTCachingDirectory make bad decisions, so avoid
// using it if the user requested a filesystem directory.
if (rarely(random) && !bare && filesystem == false) {
directory = new NRTCachingDirectory(directory, random.nextDouble(), random.nextDouble());
}

Expand Down
16 changes: 14 additions & 2 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ Upgrade Notes

* SOLR-14092: Deprecated BlockJoinFacetComponent and BlockJoinDocSetFacetComponent are removed
Users are encouraged to migrate to uniqueBlock() in JSON Facet API. (Mikhail Khludnev)

* SOLR-13985: Solr's Jetty now binds to localhost network interface by default for better out of the box security.
Administrators that need Solr exposed more broadly can change the SOLR_JETTY_HOST property in their Solr include
(solr.in.sh/solr.in.cmd) file. (Jason Gerlowski, David Smiley, Robert Muir)

Improvements
----------------------
Expand Down Expand Up @@ -149,12 +153,14 @@ Upgrade Notes
If you prefer to keep the old (but insecure) serialization strategy, you can start your nodes using the
property: `-Dsolr.useUnsafeOverseerResponse=true`. Keep in mind that this will be removed in future version of Solr.

* SOLR-13808: add cache=false into uderneath BoolQParser's filter clause or {"bool":{"filter":..}} to avoid caching in
* SOLR-13808: add cache=false into underneath BoolQParser's filter clause or {"bool":{"filter":..}} to avoid caching in
filterCache. (Mikhail Khludnev)

New Features
---------------------
(No changes)
* SOLR-12490: Introducing json.queries in JSON Request API. Every property of this object holds one or many named
Query DSL queries. It's optional and doesn't impact response without explicit referencing these queries by names
(Anatolii Siuniaev via Mikhail Khludnev)

Improvements
---------------------
Expand Down Expand Up @@ -187,6 +193,8 @@ Improvements
hl.fragsizeIsMinimum, with defaults that aim to better center matches in fragments than previously. See the ref guide.
Regardless of the settings, the passages may be sized differently than before. (Nándor Mátravölgyi, David Smiley)

* SOLR-14154: Return correct isolation level when retrieving it from the SQL Connection (Nick Vercammen, Kevin Risden)

Optimizations
---------------------
(No changes)
Expand All @@ -204,6 +212,10 @@ Bug Fixes

* SOLR-14122: SimUtils converts v2 to v1 request params incorrectly. (Li Cao, ab)

* SOLR-13089: Fix lsof edge cases in the solr CLI script (Martijn Koster via janhoy)

* SOLR-11746: Fixed existence query support for numeric point fields. (Kai Chan, hossman, Houston Putman)

Other Changes
---------------------

Expand Down
33 changes: 33 additions & 0 deletions solr/bin/postlogs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

############################################################################################
#
# A command line tool for indexing Solr logs in the out-of-the-box log format.
#
# First build the Solr distribution. Then run postlogs from inside the Solr distribution
# home directory as described below:
#
# parameters:
#
# -- baseUrl: Example http://localhost:8983/solr/collection1
# -- rootDir: All files found at or below the root will be indexed
#
# Sample syntax: ./bin/postlogs http://localhost:8983/solr/collection1 /user/foo/logs");
#
#
############################################################################################

java -classpath dist/*:dist/solrj-lib/*: org.apache.solr.util.SolrLogPostTool $1 $2
Loading

0 comments on commit 405d227

Please sign in to comment.