Skip to content

Commit ffdfcec

Browse files
authored
Upgrade to Lucene 10.2.0 (#126594)
This commit upgrade Elasticsearch to lucene 10.2.0
1 parent 58a2939 commit ffdfcec

File tree

20 files changed

+500
-347
lines changed

20 files changed

+500
-347
lines changed

Diff for: benchmarks/src/main/java/org/elasticsearch/benchmark/vector/VectorScorerBenchmark.java

+13-8
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.apache.lucene.store.MMapDirectory;
2020
import org.apache.lucene.util.hnsw.RandomVectorScorer;
2121
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
22+
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
2223
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
2324
import org.apache.lucene.util.quantization.ScalarQuantizer;
2425
import org.elasticsearch.common.logging.LogConfigurator;
@@ -76,10 +77,10 @@ public class VectorScorerBenchmark {
7677
float vec2Offset;
7778
float scoreCorrectionConstant;
7879

79-
RandomVectorScorer luceneDotScorer;
80-
RandomVectorScorer luceneSqrScorer;
81-
RandomVectorScorer nativeDotScorer;
82-
RandomVectorScorer nativeSqrScorer;
80+
UpdateableRandomVectorScorer luceneDotScorer;
81+
UpdateableRandomVectorScorer luceneSqrScorer;
82+
UpdateableRandomVectorScorer nativeDotScorer;
83+
UpdateableRandomVectorScorer nativeSqrScorer;
8384

8485
RandomVectorScorer luceneDotScorerQuery;
8586
RandomVectorScorer nativeDotScorerQuery;
@@ -118,12 +119,16 @@ public void setup() throws IOException {
118119
in = dir.openInput("vector.data", IOContext.DEFAULT);
119120
var values = vectorValues(dims, 2, in, VectorSimilarityFunction.DOT_PRODUCT);
120121
scoreCorrectionConstant = values.getScalarQuantizer().getConstantMultiplier();
121-
luceneDotScorer = luceneScoreSupplier(values, VectorSimilarityFunction.DOT_PRODUCT).scorer(0);
122+
luceneDotScorer = luceneScoreSupplier(values, VectorSimilarityFunction.DOT_PRODUCT).scorer();
123+
luceneDotScorer.setScoringOrdinal(0);
122124
values = vectorValues(dims, 2, in, VectorSimilarityFunction.EUCLIDEAN);
123-
luceneSqrScorer = luceneScoreSupplier(values, VectorSimilarityFunction.EUCLIDEAN).scorer(0);
125+
luceneSqrScorer = luceneScoreSupplier(values, VectorSimilarityFunction.EUCLIDEAN).scorer();
126+
luceneSqrScorer.setScoringOrdinal(0);
124127

125-
nativeDotScorer = factory.getInt7SQVectorScorerSupplier(DOT_PRODUCT, in, values, scoreCorrectionConstant).get().scorer(0);
126-
nativeSqrScorer = factory.getInt7SQVectorScorerSupplier(EUCLIDEAN, in, values, scoreCorrectionConstant).get().scorer(0);
128+
nativeDotScorer = factory.getInt7SQVectorScorerSupplier(DOT_PRODUCT, in, values, scoreCorrectionConstant).get().scorer();
129+
nativeDotScorer.setScoringOrdinal(0);
130+
nativeSqrScorer = factory.getInt7SQVectorScorerSupplier(EUCLIDEAN, in, values, scoreCorrectionConstant).get().scorer();
131+
nativeSqrScorer.setScoringOrdinal(0);
127132

128133
// setup for getInt7SQVectorScorer / query vector scoring
129134
float[] queryVec = new float[dims];

Diff for: build-tools-internal/version.properties

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
elasticsearch = 9.1.0
2-
lucene = 10.1.0
2+
lucene = 10.2.0
33

44
bundled_jdk_vendor = openjdk
55
bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90
@@ -8,7 +8,7 @@ spatial4j = 0.7
88
jts = 1.15.0
99
jackson = 2.15.0
1010
snakeyaml = 2.0
11-
icu4j = 68.2
11+
icu4j = 77.1
1212
supercsv = 2.4.0
1313
log4j = 2.19.0
1414
slf4j = 2.0.6

Diff for: docs/Versions.asciidoc

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11

22
include::{docs-root}/shared/versions/stack/{source_branch}.asciidoc[]
33

4-
:lucene_version: 10.1.0
5-
:lucene_version_path: 10_1_0
4+
:lucene_version: 10.2.0
5+
:lucene_version_path: 10_2_0
66
:jdk: 11.0.2
77
:jdk_major: 11
88
:build_type: tar

Diff for: docs/changelog/126594.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126594
2+
summary: Upgrade to Lucene 10.2.0
3+
area: Search
4+
type: upgrade
5+
issues: []

Diff for: gradle/verification-metadata.xml

+80-135
Large diffs are not rendered by default.

Diff for: libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/Int7SQVectorScorerSupplier.java

+12-7
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
package org.elasticsearch.simdvec.internal;
1111

1212
import org.apache.lucene.store.MemorySegmentAccessInput;
13-
import org.apache.lucene.util.hnsw.RandomVectorScorer;
1413
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
14+
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
1515
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
1616
import org.apache.lucene.util.quantization.ScalarQuantizedVectorSimilarity;
1717

@@ -55,9 +55,6 @@ protected final void checkOrdinal(int ord) {
5555
}
5656

5757
final float scoreFromOrds(int firstOrd, int secondOrd) throws IOException {
58-
checkOrdinal(firstOrd);
59-
checkOrdinal(secondOrd);
60-
6158
final int length = dims;
6259
long firstByteOffset = (long) firstOrd * (length + Float.BYTES);
6360
long secondByteOffset = (long) secondOrd * (length + Float.BYTES);
@@ -92,13 +89,21 @@ protected final float fallbackScore(long firstByteOffset, long secondByteOffset)
9289
}
9390

9491
@Override
95-
public RandomVectorScorer scorer(int ord) {
96-
checkOrdinal(ord);
97-
return new RandomVectorScorer.AbstractRandomVectorScorer(values) {
92+
public UpdateableRandomVectorScorer scorer() {
93+
return new UpdateableRandomVectorScorer.AbstractUpdateableRandomVectorScorer(values) {
94+
private int ord = -1;
95+
9896
@Override
9997
public float score(int node) throws IOException {
98+
checkOrdinal(node);
10099
return scoreFromOrds(ord, node);
101100
}
101+
102+
@Override
103+
public void setScoringOrdinal(int node) throws IOException {
104+
checkOrdinal(node);
105+
this.ord = node;
106+
}
102107
};
103108
}
104109

Diff for: libs/simdvec/src/test/java/org/elasticsearch/simdvec/VectorScorerFactoryTests.java

+46-22
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
import org.apache.lucene.store.IndexInput;
2020
import org.apache.lucene.store.IndexOutput;
2121
import org.apache.lucene.store.MMapDirectory;
22-
import org.apache.lucene.util.hnsw.RandomVectorScorer;
2322
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
23+
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
2424
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
2525
import org.apache.lucene.util.quantization.ScalarQuantizer;
2626

@@ -50,6 +50,8 @@
5050
// @com.carrotsearch.randomizedtesting.annotations.Repeat(iterations = 100)
5151
public class VectorScorerFactoryTests extends AbstractVectorTestCase {
5252

53+
private static final float DELTA = 1e-4f;
54+
5355
// bounds of the range of values that can be seen by int7 scalar quantized vectors
5456
static final byte MIN_INT7_VALUE = 0;
5557
static final byte MAX_INT7_VALUE = 127;
@@ -99,10 +101,13 @@ void testSimpleImpl(long maxChunkSize) throws IOException {
99101
float scc = values.getScalarQuantizer().getConstantMultiplier();
100102
float expected = luceneScore(sim, vec1, vec2, scc, vec1Correction, vec2Correction);
101103

102-
var luceneSupplier = luceneScoreSupplier(values, VectorSimilarityType.of(sim)).scorer(0);
104+
var luceneSupplier = luceneScoreSupplier(values, VectorSimilarityType.of(sim)).scorer();
105+
luceneSupplier.setScoringOrdinal(0);
103106
assertThat(luceneSupplier.score(1), equalTo(expected));
104107
var supplier = factory.getInt7SQVectorScorerSupplier(sim, in, values, scc).get();
105-
assertThat(supplier.scorer(0).score(1), equalTo(expected));
108+
var scorer = supplier.scorer();
109+
scorer.setScoringOrdinal(0);
110+
assertThat(scorer.score(1), equalTo(expected));
106111

107112
if (Runtime.version().feature() >= 22) {
108113
var qScorer = factory.getInt7SQVectorScorer(VectorSimilarityType.of(sim), values, query1).get();
@@ -134,24 +139,32 @@ public void testNonNegativeDotProduct() throws IOException {
134139
float expected = 0f;
135140
assertThat(luceneScore(DOT_PRODUCT, vec1, vec2, 1, -5, -5), equalTo(expected));
136141
var supplier = factory.getInt7SQVectorScorerSupplier(DOT_PRODUCT, in, values, 1).get();
137-
assertThat(supplier.scorer(0).score(1), equalTo(expected));
138-
assertThat(supplier.scorer(0).score(1), greaterThanOrEqualTo(0f));
142+
var scorer = supplier.scorer();
143+
scorer.setScoringOrdinal(0);
144+
assertThat(scorer.score(1), equalTo(expected));
145+
assertThat(scorer.score(1), greaterThanOrEqualTo(0f));
139146
// max inner product
140147
expected = luceneScore(MAXIMUM_INNER_PRODUCT, vec1, vec2, 1, -5, -5);
141148
supplier = factory.getInt7SQVectorScorerSupplier(MAXIMUM_INNER_PRODUCT, in, values, 1).get();
142-
assertThat(supplier.scorer(0).score(1), greaterThanOrEqualTo(0f));
143-
assertThat(supplier.scorer(0).score(1), equalTo(expected));
149+
scorer = supplier.scorer();
150+
scorer.setScoringOrdinal(0);
151+
assertThat(scorer.score(1), greaterThanOrEqualTo(0f));
152+
assertThat(scorer.score(1), equalTo(expected));
144153
// cosine
145154
expected = 0f;
146155
assertThat(luceneScore(COSINE, vec1, vec2, 1, -5, -5), equalTo(expected));
147156
supplier = factory.getInt7SQVectorScorerSupplier(COSINE, in, values, 1).get();
148-
assertThat(supplier.scorer(0).score(1), equalTo(expected));
149-
assertThat(supplier.scorer(0).score(1), greaterThanOrEqualTo(0f));
157+
scorer = supplier.scorer();
158+
scorer.setScoringOrdinal(0);
159+
assertThat(scorer.score(1), equalTo(expected));
160+
assertThat(scorer.score(1), greaterThanOrEqualTo(0f));
150161
// euclidean
151162
expected = luceneScore(EUCLIDEAN, vec1, vec2, 1, -5, -5);
152163
supplier = factory.getInt7SQVectorScorerSupplier(EUCLIDEAN, in, values, 1).get();
153-
assertThat(supplier.scorer(0).score(1), equalTo(expected));
154-
assertThat(supplier.scorer(0).score(1), greaterThanOrEqualTo(0f));
164+
scorer = supplier.scorer();
165+
scorer.setScoringOrdinal(0);
166+
assertThat(scorer.score(1), equalTo(expected));
167+
assertThat(scorer.score(1), greaterThanOrEqualTo(0f));
155168
}
156169
}
157170
}
@@ -208,7 +221,9 @@ void testRandomSupplier(long maxChunkSize, Function<Integer, byte[]> byteArraySu
208221
var values = vectorValues(dims, size, in, VectorSimilarityType.of(sim));
209222
float expected = luceneScore(sim, vectors[idx0], vectors[idx1], correction, offsets[idx0], offsets[idx1]);
210223
var supplier = factory.getInt7SQVectorScorerSupplier(sim, in, values, correction).get();
211-
assertThat(supplier.scorer(idx0).score(idx1), equalTo(expected));
224+
var scorer = supplier.scorer();
225+
scorer.setScoringOrdinal(idx0);
226+
assertThat(scorer.score(idx1), equalTo(expected));
212227
}
213228
}
214229
}
@@ -265,7 +280,7 @@ void testRandomScorerImpl(long maxChunkSize, Function<Integer, float[]> floatArr
265280

266281
var expected = luceneScore(sim, qVectors[idx0], qVectors[idx1], correction, corrections[idx0], corrections[idx1]);
267282
var scorer = factory.getInt7SQVectorScorer(VectorSimilarityType.of(sim), values, vectors[idx0]).get();
268-
assertThat(scorer.score(idx1), equalTo(expected));
283+
assertEquals(scorer.score(idx1), expected, DELTA);
269284
}
270285
}
271286
}
@@ -313,7 +328,9 @@ void testRandomSliceImpl(int dims, long maxChunkSize, int initialPadding, Functi
313328
var values = vectorValues(dims, size, in, VectorSimilarityType.of(sim));
314329
float expected = luceneScore(sim, vectors[idx0], vectors[idx1], correction, offsets[idx0], offsets[idx1]);
315330
var supplier = factory.getInt7SQVectorScorerSupplier(sim, in, values, correction).get();
316-
assertThat(supplier.scorer(idx0).score(idx1), equalTo(expected));
331+
var scorer = supplier.scorer();
332+
scorer.setScoringOrdinal(idx0);
333+
assertThat(scorer.score(idx1), equalTo(expected));
317334
}
318335
}
319336
}
@@ -352,7 +369,9 @@ public void testLarge() throws IOException {
352369
var values = vectorValues(dims, size, in, VectorSimilarityType.of(sim));
353370
float expected = luceneScore(sim, vector(idx0, dims), vector(idx1, dims), correction, off0, off1);
354371
var supplier = factory.getInt7SQVectorScorerSupplier(sim, in, values, correction).get();
355-
assertThat(supplier.scorer(idx0).score(idx1), equalTo(expected));
372+
var scorer = supplier.scorer();
373+
scorer.setScoringOrdinal(idx0);
374+
assertThat(scorer.score(idx1), equalTo(expected));
356375
}
357376
}
358377
}
@@ -391,8 +410,8 @@ void testRaceImpl(VectorSimilarityType sim) throws Exception {
391410
var values = vectorValues(dims, 4, in, VectorSimilarityType.of(sim));
392411
var scoreSupplier = factory.getInt7SQVectorScorerSupplier(sim, in, values, 1f).get();
393412
var tasks = List.<Callable<Optional<Throwable>>>of(
394-
new ScoreCallable(scoreSupplier.copy().scorer(0), 1, expectedScore1),
395-
new ScoreCallable(scoreSupplier.copy().scorer(2), 3, expectedScore2)
413+
new ScoreCallable(scoreSupplier.copy().scorer(), 0, 1, expectedScore1),
414+
new ScoreCallable(scoreSupplier.copy().scorer(), 2, 3, expectedScore2)
396415
);
397416
var executor = Executors.newFixedThreadPool(2);
398417
var results = executor.invokeAll(tasks);
@@ -408,14 +427,19 @@ void testRaceImpl(VectorSimilarityType sim) throws Exception {
408427

409428
static class ScoreCallable implements Callable<Optional<Throwable>> {
410429

411-
final RandomVectorScorer scorer;
430+
final UpdateableRandomVectorScorer scorer;
412431
final int ord;
413432
final float expectedScore;
414433

415-
ScoreCallable(RandomVectorScorer scorer, int ord, float expectedScore) {
416-
this.scorer = scorer;
417-
this.ord = ord;
418-
this.expectedScore = expectedScore;
434+
ScoreCallable(UpdateableRandomVectorScorer scorer, int queryOrd, int ord, float expectedScore) {
435+
try {
436+
this.scorer = scorer;
437+
this.scorer.setScoringOrdinal(queryOrd);
438+
this.ord = ord;
439+
this.expectedScore = expectedScore;
440+
} catch (IOException e) {
441+
throw new RuntimeException(e);
442+
}
419443
}
420444

421445
@Override

Diff for: modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/AbstractCompoundWordTokenFilterFactory.java

+4
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ public abstract class AbstractCompoundWordTokenFilterFactory extends AbstractTok
2828
protected final int maxSubwordSize;
2929
protected final boolean onlyLongestMatch;
3030
protected final CharArraySet wordList;
31+
// TODO expose this parameter?
32+
protected final boolean reuseChars;
3133

3234
protected AbstractCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
3335
super(name);
@@ -36,6 +38,8 @@ protected AbstractCompoundWordTokenFilterFactory(IndexSettings indexSettings, En
3638
minSubwordSize = settings.getAsInt("min_subword_size", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
3739
maxSubwordSize = settings.getAsInt("max_subword_size", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
3840
onlyLongestMatch = settings.getAsBoolean("only_longest_match", false);
41+
// TODO is the default of true correct? see: https://github.com/apache/lucene/pull/14278
42+
reuseChars = true;
3943
wordList = Analysis.getWordSet(env, settings, "word_list");
4044
if (wordList == null) {
4145
throw new IllegalArgumentException("word_list must be provided for [" + name + "], either as a path to a file, or directly");

Diff for: modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/DictionaryCompoundWordTokenFilterFactory.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ public class DictionaryCompoundWordTokenFilterFactory extends AbstractCompoundWo
2828

2929
@Override
3030
public TokenStream create(TokenStream tokenStream) {
31-
return new DictionaryCompoundWordTokenFilter(tokenStream, wordList, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
31+
return new DictionaryCompoundWordTokenFilter(
32+
tokenStream,
33+
wordList,
34+
minWordSize,
35+
minSubwordSize,
36+
maxSubwordSize,
37+
onlyLongestMatch,
38+
reuseChars
39+
);
3240
}
3341
}

Diff for: server/src/main/java/org/elasticsearch/index/IndexVersions.java

+1
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ private static Version parseUnchecked(String version) {
159159
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_UNSIGNED_LONG = def(9_019_0_00, Version.LUCENE_10_1_0);
160160
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_SCALED_FLOAT = def(9_020_0_00, Version.LUCENE_10_1_0);
161161
public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0);
162+
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_0 = def(9_022_00_0, Version.LUCENE_10_2_0);
162163
/*
163164
* STOP! READ THIS FIRST! No, really,
164165
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

Diff for: server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java

+22-6
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.lucene.util.VectorUtil;
2323
import org.apache.lucene.util.hnsw.RandomVectorScorer;
2424
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
25+
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
2526
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
2627

2728
import java.io.IOException;
@@ -130,18 +131,33 @@ public float score(int i) throws IOException {
130131
}
131132

132133
static class HammingScorerSupplier implements RandomVectorScorerSupplier {
133-
private final ByteVectorValues byteValues, byteValues1, byteValues2;
134+
private final ByteVectorValues byteValues, targetValues;
134135

135136
HammingScorerSupplier(ByteVectorValues byteValues) throws IOException {
136137
this.byteValues = byteValues;
137-
this.byteValues1 = byteValues.copy();
138-
this.byteValues2 = byteValues.copy();
138+
this.targetValues = byteValues.copy();
139139
}
140140

141141
@Override
142-
public RandomVectorScorer scorer(int i) throws IOException {
143-
byte[] query = byteValues1.vectorValue(i);
144-
return new HammingVectorScorer(byteValues2, query);
142+
public UpdateableRandomVectorScorer scorer() throws IOException {
143+
return new UpdateableRandomVectorScorer.AbstractUpdateableRandomVectorScorer(targetValues) {
144+
private final byte[] query = new byte[targetValues.dimension()];
145+
private int currentOrd = -1;
146+
147+
@Override
148+
public void setScoringOrdinal(int i) throws IOException {
149+
if (currentOrd == i) {
150+
return;
151+
}
152+
System.arraycopy(targetValues.vectorValue(i), 0, query, 0, query.length);
153+
this.currentOrd = i;
154+
}
155+
156+
@Override
157+
public float score(int i) throws IOException {
158+
return hammingScore(targetValues.vectorValue(i), query);
159+
}
160+
};
145161
}
146162

147163
@Override

0 commit comments

Comments
 (0)