From 6cd1cd24eb53e2dc968c7eff618885d7b89d0a22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anton=20H=C3=A4gerstrand?= Date: Fri, 13 Nov 2015 17:28:49 +0100 Subject: [PATCH 1/2] Adapting to ES 2.0.0 --- pom.xml | 24 ++- src/main/assemblies/plugin.xml | 7 + .../apache/lucene/analysis/ComboAnalyzer.java | 191 ++++++++---------- .../lucene/analysis/ComboAnalyzerWrapper.java | 9 +- .../ReusableTokenStreamComponents.java | 68 ------- .../lucene/util/ReaderCloneFactory.java | 4 +- .../ReusableStringReaderCloner.java | 17 +- .../lucene/util}/StringReaderCloner.java | 4 +- .../ComboAnalysisBinderProcessor.java | 30 --- .../analysis/combo/AnalysisComboPlugin.java | 15 +- src/main/resources/es-plugin.properties | 2 - .../resources/plugin-descriptor.properties | 34 ++++ .../javax/io/StringReaderClonerTests.java | 1 + .../ReusableStringReaderClonerTests.java | 1 + .../lucene/analysis/TestComboAnalyzer.java | 62 ++++-- .../TestReusableStringReaderCloner.java | 3 +- .../lucene/util/TestReaderCloneFactory.java | 1 - .../index/analysis/TestIntegration.java | 26 ++- 18 files changed, 232 insertions(+), 267 deletions(-) delete mode 100644 src/main/java/org/apache/lucene/analysis/ReusableTokenStreamComponents.java rename src/main/java/org/apache/lucene/{analysis => util}/ReusableStringReaderCloner.java (81%) rename src/main/java/{javax/io => org/apache/lucene/util}/StringReaderCloner.java (97%) delete mode 100644 src/main/java/org/elasticsearch/index/analysis/ComboAnalysisBinderProcessor.java delete mode 100644 src/main/resources/es-plugin.properties create mode 100644 src/main/resources/plugin-descriptor.properties diff --git a/pom.xml b/pom.xml index b64ce66..0f336a8 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 com.yakaz.elasticsearch.plugins elasticsearch-analysis-combo - 1.5.2-SNAPSHOT + 2.0.0-SNAPSHOT jar 2011 @@ -38,8 +38,9 @@ - 1.0.0.RC1 - 4.6.0 + 2.0.0 + 5.2.1 + 1.7 @@ -75,6 +76,19 @@ test + + junit + junit + 4.11 + + + hamcrest-core + org.hamcrest + + + test + + log4j log4j @@ -117,8 +131,8 @@ maven-compiler-plugin 2.3.2 - 1.6 - 1.6 + ${mvn.java.version} + ${mvn.java.version} diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml index 9e3ccb3..2e041df 100644 --- a/src/main/assemblies/plugin.xml +++ b/src/main/assemblies/plugin.xml @@ -5,6 +5,13 @@ zip false + + + src/main/resources/plugin-descriptor.properties + + true + + / diff --git a/src/main/java/org/apache/lucene/analysis/ComboAnalyzer.java b/src/main/java/org/apache/lucene/analysis/ComboAnalyzer.java index 86678c3..a94c729 100644 --- a/src/main/java/org/apache/lucene/analysis/ComboAnalyzer.java +++ b/src/main/java/org/apache/lucene/analysis/ComboAnalyzer.java @@ -18,18 +18,14 @@ package org.apache.lucene.analysis; import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter; -import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.ReaderCloneFactory; -import org.apache.lucene.util.Version; -import org.elasticsearch.common.logging.ESLogger; -import org.elasticsearch.common.logging.ESLoggerFactory; import java.io.IOException; import java.io.Reader; -import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; -import java.util.concurrent.atomic.AtomicReference; /** * An analyzer that combines multiple sub-analyzers into one. @@ -50,8 +46,6 @@ */ public class ComboAnalyzer extends Analyzer { - protected static final ESLogger logger = ESLoggerFactory.getLogger(ComboAnalyzer.class.getSimpleName()); - /** * Default value for the enabled state of {@link TokenStream} caching. */ @@ -71,13 +65,8 @@ public class ComboAnalyzer extends Analyzer { private boolean deduplication = DEDUPLICATION_ENABLED_DEFAULT; - private CloseableThreadLocal lastTokenStreams = new CloseableThreadLocal(); - private CloseableThreadLocal tempTokenStreams = new CloseableThreadLocal(); - private CloseableThreadLocal lastComboTokenStream = new CloseableThreadLocal(); - - public ComboAnalyzer(Version version, Analyzer... subAnalyzers) { - super(new GlobalReuseStrategy()); - + public ComboAnalyzer(Analyzer... subAnalyzers) { + super(); this.subAnalyzers = subAnalyzers; // Detect duplicates in analyzers @@ -168,113 +157,109 @@ public ComboAnalyzer disableDeduplication() { return this; } - protected ReaderCloneFactory.ReaderCloner cloneReader(Reader originalReader) { - ReaderCloneFactory.ReaderCloner rtn; + private static Tokenizer DUMMY_TOKENIZER = new Tokenizer(){ + @Override + public boolean incrementToken() throws IOException { + return false; + } + }; - // Duplication of the original reader, to feed all sub-analyzers - if (subAnalyzers.length <= 1) { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + return new CombiningTokenStreamComponents(fieldName); + } - // Can reuse the only reader we have, there will be no need of duplication - // Usage of the AtomicReference ensures that the same reader won't be duplicated. - ReaderCloneFactory.ReaderCloner useOnceReaderCloner = new ReaderCloneFactory.ReaderCloner() { - private AtomicReference singleUsageReference = null; - public void init(Reader originalReader) throws IOException { - singleUsageReference = new AtomicReference(originalReader); - } - public Reader giveAClone() { - return singleUsageReference.getAndSet(null); - } - }; - try { - useOnceReaderCloner.init(originalReader); - } catch (Throwable fail) { - useOnceReaderCloner = null; - } - rtn = useOnceReaderCloner; + @Override public void close() { + super.close(); + } - } else { + private class CombiningTokenStreamComponents extends TokenStreamComponents { - rtn = ReaderCloneFactory.getCloner(originalReader); // internally uses the default "should always work" implementation + private final Map duplicateAnalyzers = new HashMap(); + private final String field; + private Reader reader; + public CombiningTokenStreamComponents(String field) { + super(DUMMY_TOKENIZER); + this.field = field; } - if (rtn == null) { - throw new IllegalArgumentException("Could not duplicate the original reader to feed multiple sub-readers"); + @Override + public void setReader(Reader reader) throws IOException { + duplicateAnalyzers.clear(); + this.reader = reader; } - return rtn; - } - @Override - protected TokenStreamComponents createComponents(String fieldName, Reader originalReader) { - // Duplication of the original reader, to feed all sub-analyzers - ReaderCloneFactory.ReaderCloner readerCloner = cloneReader(originalReader); - - // We remember last used TokenStreams because many times Analyzers can provide a reusable TokenStream - // Detecting that all sub-TokenStreams are reusable permits to reuse our ComboTokenStream as well. - if (tempTokenStreams.get() == null) tempTokenStreams.set(new TokenStream[subAnalyzers.length]); // each time non reusability has been detected - if (lastTokenStreams.get() == null) lastTokenStreams.set(new TokenStream[subAnalyzers.length]); // only at first run - TokenStream[] tempTokenStreams_local = tempTokenStreams.get(); - TokenStream[] lastTokenStreams_local = lastTokenStreams.get(); - ReusableTokenStreamComponents lastComboTokenStream_local = lastComboTokenStream.get(); - if (lastComboTokenStream_local == null) - lastComboTokenStream_local = new ReusableTokenStreamComponents(fieldName, this); + @Override + public TokenStream getTokenStream() { + TokenStream ret = createTokenStreams(); + return deduplication ? new UniqueTokenFilter(ret): ret; + } - // Get sub-TokenStreams from sub-analyzers - for (int i = subAnalyzers.length-1 ; i >= 0 ; --i) { + private TokenStream createTokenStreams() { + if(subAnalyzers.length == 1){ + return createTokenStream(subAnalyzers[0], field, reader); + } + else{ + ReaderCloneFactory.ReaderCloner cloner = ReaderCloneFactory.getCloner(reader); + TokenStream[] streams = new TokenStream[subAnalyzers.length]; + for (int i = 0; i < subAnalyzers.length; i++) { + streams[i] = createTokenStream(subAnalyzers[i], field, cloner.giveAClone()); + } + return new ComboTokenStream(streams); + } + } - // Feed the troll - Reader reader = readerCloner.giveAClone(); - tempTokenStreams_local[i] = null; + private TokenStream createTokenStream(Analyzer analyzer, String field, Reader reader) { try { - tempTokenStreams_local[i] = subAnalyzers[i].tokenStream(fieldName, reader); - } catch (IOException ignored) { - logger.debug("Ignoring {}th analyzer [{}]. Could not get a TokenStream.", ignored, i, subAnalyzers[i]); - } - // Use caching if asked or if required in case of duplicated analyzers - if (cacheTokenStreams || hasDuplicatedAnalyzers && duplicatedAnalyzers.contains(subAnalyzers[i])) { - CachingTokenStream cache = new CachingTokenStream(tempTokenStreams_local[i]); - try { - tempTokenStreams_local[i].reset(); - cache.fillCache(); - } catch (IOException ignored) { - logger.debug("Got an error when caching TokenStream from the {}th analyzer [{}]", i, subAnalyzers[i]); + if(hasDuplicatedAnalyzers && duplicatedAnalyzers.contains(analyzer)) { + return createCachedCopies(analyzer, field, reader); + } + else if(cacheTokenStreams){ + return loadAndClose(analyzer.tokenStream(field, reader)); } - try { - // Close original stream, all tokens are buffered - tempTokenStreams_local[i].close(); - } catch (IOException ignored) { - logger.debug("Got an error when closing TokenStream from the {}th analyzer [{}]", i, subAnalyzers[i]); + else{ + return analyzer.tokenStream(field, reader); } - tempTokenStreams_local[i] = cache; + } catch (IOException e) { + throw new RuntimeException(e); } - // Detect non reusability - if (tempTokenStreams_local[i] != lastTokenStreams_local[i]) { - lastComboTokenStream_local.setTokenStream(null); + } + + private TokenStream createCachedCopies(Analyzer analyzer, String field ,Reader reader) throws IOException { + //First time we see this analyzer, means that we have to cache the content + if(!duplicateAnalyzers.containsKey(analyzer)){ + CachingTokenStream caching = loadAndClose(analyzer.tokenStream(field, reader)); + duplicateAnalyzers.put(analyzer, caching); + return caching; + } + else{ + //Already seen, can just create a new copy of the cached + return loadAsCaching(duplicateAnalyzers.get(analyzer)); } } - // If last ComboTokenStream is not available create a new one - // This happens in the first call and in case of non reusability - if (lastComboTokenStream_local.getTokenStream() == null) { - // Clear old invalid references (preferred over allocating a new array) - Arrays.fill(lastTokenStreams_local, null); - // Swap temporary and last (non reusable) TokenStream references - lastTokenStreams.set(tempTokenStreams_local); - tempTokenStreams.set(lastTokenStreams_local); - // New ComboTokenStream to use - lastComboTokenStream_local.setTokenStream(new ComboTokenStream(tempTokenStreams_local)); - if (deduplication) - lastComboTokenStream_local.setTokenStream(new UniqueTokenFilter(lastComboTokenStream_local.getTokenStream(), true)); - lastComboTokenStream.set(lastComboTokenStream_local); + private CachingTokenStream loadAndClose(TokenStream tokenStream) { + CachingTokenStream cache = loadAsCaching(tokenStream); + try{ + tokenStream.close(); + } + catch (IOException e){ + throw new RuntimeException(e); + } + return cache; } - return lastComboTokenStream_local; - } - @Override public void close() { - super.close(); - lastTokenStreams.close(); - tempTokenStreams.close(); - lastComboTokenStream.close(); + private CachingTokenStream loadAsCaching(TokenStream tokenStream) { + try{ + CachingTokenStream cachingTokenStream = new CachingTokenStream(tokenStream); + tokenStream.reset(); + cachingTokenStream.fillCache(); + return cachingTokenStream; + } + catch (Exception e){ + throw new RuntimeException(e); + } + } } - } diff --git a/src/main/java/org/apache/lucene/analysis/ComboAnalyzerWrapper.java b/src/main/java/org/apache/lucene/analysis/ComboAnalyzerWrapper.java index 7fe2400..725ce11 100644 --- a/src/main/java/org/apache/lucene/analysis/ComboAnalyzerWrapper.java +++ b/src/main/java/org/apache/lucene/analysis/ComboAnalyzerWrapper.java @@ -20,7 +20,6 @@ package org.apache.lucene.analysis; import org.apache.lucene.util.Version; -import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.ESLoggerFactory; @@ -72,7 +71,7 @@ protected void init() { String[] sub = settings.getAsArray("sub_analyzers"); ArrayList subAnalyzers = new ArrayList(); if (sub == null) { - throw new ElasticsearchIllegalArgumentException("Analyzer ["+name+"] analyzer of type ["+NAME+"], must have a \"sub_analyzers\" list property"); + throw new IllegalArgumentException("Analyzer ["+name+"] analyzer of type ["+NAME+"], must have a \"sub_analyzers\" list property"); } for (String subname : sub) { @@ -84,7 +83,7 @@ protected void init() { } } - this.analyzer = new org.apache.lucene.analysis.ComboAnalyzer(version, subAnalyzers.toArray(new Analyzer[subAnalyzers.size()])); + this.analyzer = new org.apache.lucene.analysis.ComboAnalyzer(subAnalyzers.toArray(new Analyzer[subAnalyzers.size()])); Boolean tokenstreamCaching = settings.getAsBoolean("tokenstream_caching", null); if (tokenstreamCaching != null) @@ -96,9 +95,9 @@ protected void init() { } @Override - protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + protected TokenStreamComponents createComponents(String fieldName) { if (analyzer == null) init(); - return this.analyzer.createComponents(fieldName, reader); + return this.analyzer.createComponents(fieldName); } @Override public void close() { diff --git a/src/main/java/org/apache/lucene/analysis/ReusableTokenStreamComponents.java b/src/main/java/org/apache/lucene/analysis/ReusableTokenStreamComponents.java deleted file mode 100644 index 7ee38b4..0000000 --- a/src/main/java/org/apache/lucene/analysis/ReusableTokenStreamComponents.java +++ /dev/null @@ -1,68 +0,0 @@ -package org.apache.lucene.analysis; - -import java.io.IOException; -import java.io.Reader; - -public class ReusableTokenStreamComponents extends Analyzer.TokenStreamComponents { - - protected TokenStream sink; - protected final String fieldName; - protected final ComboAnalyzer analyzer; - - public ReusableTokenStreamComponents(String fieldName, ComboAnalyzer analyzer) { - super(DummyTokenizer.INSTANCE); - this.fieldName = fieldName; - this.analyzer = analyzer; - } - - public void setTokenStream(TokenStream sink) { - this.sink = sink; - } - - @Override - protected void setReader(Reader reader) throws IOException { - // This ReusableTokenStreamComponents comes from a ReuseStrategy, - // which uses a ThreadLocal, hence the ComboAnalyzer will reuse - // this instance and make it ready. - analyzer.createComponents(fieldName, reader); - } - - @Override - public TokenStream getTokenStream() { - return sink; - } - - protected static final class DummyTokenizer extends Tokenizer { - - public static final DummyTokenizer INSTANCE = new DummyTokenizer(); - - public DummyTokenizer() { - super(DummyReader.INSTANCE); - } - - @Override - public boolean incrementToken() throws IOException { - return false; - } - - } - - protected static class DummyReader extends Reader { - - public static final DummyReader INSTANCE = new DummyReader(); - - public DummyReader() { - } - - @Override - public int read(char[] cbuf, int off, int len) throws IOException { - return 0; - } - - @Override - public void close() throws IOException { - } - - } - -} diff --git a/src/main/java/org/apache/lucene/util/ReaderCloneFactory.java b/src/main/java/org/apache/lucene/util/ReaderCloneFactory.java index 5869ee6..1a1d6f5 100644 --- a/src/main/java/org/apache/lucene/util/ReaderCloneFactory.java +++ b/src/main/java/org/apache/lucene/util/ReaderCloneFactory.java @@ -17,11 +17,9 @@ package org.apache.lucene.util; -import org.apache.lucene.analysis.ReusableStringReaderCloner; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.ESLoggerFactory; -import javax.io.StringReaderCloner; import java.io.BufferedReader; import java.io.CharArrayReader; import java.io.FilterReader; @@ -43,7 +41,7 @@ * that merely reads all the available content, and creates a String out of it. * * Therefore you should understand the importance of having a proper implementation for - * any optimizable {@link Reader}. For instance, {@link javax.io.StringReaderCloner} gains access + * any optimizable {@link Reader}. For instance, {@link StringReaderCloner} gains access * to the underlying String in order to avoid copies. A generic BufferedReader */ public class ReaderCloneFactory { diff --git a/src/main/java/org/apache/lucene/analysis/ReusableStringReaderCloner.java b/src/main/java/org/apache/lucene/util/ReusableStringReaderCloner.java similarity index 81% rename from src/main/java/org/apache/lucene/analysis/ReusableStringReaderCloner.java rename to src/main/java/org/apache/lucene/util/ReusableStringReaderCloner.java index e7dfaf0..c67afff 100644 --- a/src/main/java/org/apache/lucene/analysis/ReusableStringReaderCloner.java +++ b/src/main/java/org/apache/lucene/util/ReusableStringReaderCloner.java @@ -17,9 +17,8 @@ * under the License. */ -package org.apache.lucene.analysis; +package org.apache.lucene.util; -import org.apache.lucene.util.ReaderCloneFactory; import java.io.IOException; import java.io.Reader; @@ -35,16 +34,18 @@ * private field {@code String s}, storing the original content. * It is therefore sensitive to Lucene implementation changes. */ -public class ReusableStringReaderCloner implements ReaderCloneFactory.ReaderCloner { +public class ReusableStringReaderCloner implements ReaderCloneFactory.ReaderCloner { private static java.lang.reflect.Field internalField; + private static Class reusableStringReader; - private ReusableStringReader original; + private Reader original; private String originalContent; static { try { - internalField = ReusableStringReader.class.getDeclaredField("s"); + reusableStringReader = (Class) ReusableStringReaderCloner.class.getClassLoader().loadClass("org.apache.lucene.analysis.ReusableStringReader"); + internalField = reusableStringReader.getDeclaredField("s"); internalField.setAccessible(true); } catch (Exception ex) { throw new IllegalArgumentException("Could not give accessibility to private \"str\" field of the given StringReader", ex); @@ -52,17 +53,17 @@ public class ReusableStringReaderCloner implements ReaderCloneFactory.ReaderClon } /** - * Binds this ReaderCloner with the package-private {@link ReusableStringReader} class + * Binds this ReaderCloner with the package-private ReusableStringReader class * into the {@link ReaderCloneFactory}, without giving access to the hidden class. */ public static void registerCloner() { - ReaderCloneFactory.bindCloner(ReusableStringReader.class, ReusableStringReaderCloner.class); + ReaderCloneFactory.bindCloner(reusableStringReader, ReusableStringReaderCloner.class); } /** * @param originalReader Must pass the canHandleReader(Reader) test, otherwise an IllegalArgumentException will be thrown. */ - public void init(ReusableStringReader originalReader) throws IOException { + public void init(Reader originalReader) throws IOException { this.original = originalReader; this.originalContent = null; try { diff --git a/src/main/java/javax/io/StringReaderCloner.java b/src/main/java/org/apache/lucene/util/StringReaderCloner.java similarity index 97% rename from src/main/java/javax/io/StringReaderCloner.java rename to src/main/java/org/apache/lucene/util/StringReaderCloner.java index 1f9d0fa..a7e6a04 100644 --- a/src/main/java/javax/io/StringReaderCloner.java +++ b/src/main/java/org/apache/lucene/util/StringReaderCloner.java @@ -18,9 +18,7 @@ */ // Using javax instead of java because of JVM security measures! -package javax.io; - -import org.apache.lucene.util.ReaderCloneFactory; +package org.apache.lucene.util; import java.io.IOException; import java.io.Reader; diff --git a/src/main/java/org/elasticsearch/index/analysis/ComboAnalysisBinderProcessor.java b/src/main/java/org/elasticsearch/index/analysis/ComboAnalysisBinderProcessor.java deleted file mode 100644 index 7859f60..0000000 --- a/src/main/java/org/elasticsearch/index/analysis/ComboAnalysisBinderProcessor.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.ComboAnalyzerWrapper; - -public class ComboAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { - - @Override public void processAnalyzers(AnalyzersBindings analyzersBindings) { - analyzersBindings.processAnalyzer(ComboAnalyzerWrapper.NAME, ComboAnalyzerProvider.class); - } - -} diff --git a/src/main/java/org/elasticsearch/plugin/analysis/combo/AnalysisComboPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/combo/AnalysisComboPlugin.java index 9e30ec9..ff51bab 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/combo/AnalysisComboPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/combo/AnalysisComboPlugin.java @@ -19,12 +19,12 @@ package org.elasticsearch.plugin.analysis.combo; -import org.elasticsearch.common.inject.Module; +import org.apache.lucene.analysis.ComboAnalyzerWrapper; import org.elasticsearch.index.analysis.AnalysisModule; -import org.elasticsearch.index.analysis.ComboAnalysisBinderProcessor; -import org.elasticsearch.plugins.AbstractPlugin; +import org.elasticsearch.index.analysis.ComboAnalyzerProvider; +import org.elasticsearch.plugins.Plugin; -public class AnalysisComboPlugin extends AbstractPlugin { +public class AnalysisComboPlugin extends Plugin { @Override public String name() { return "analysis-combo"; @@ -34,10 +34,7 @@ public class AnalysisComboPlugin extends AbstractPlugin { return "Analyser that can multiplex multiple terms from different analyzers"; } - @Override public void processModule(Module module) { - if (module instanceof AnalysisModule) { - AnalysisModule analysisModule = (AnalysisModule) module; - analysisModule.addProcessor(new ComboAnalysisBinderProcessor()); - } + public void onModule(AnalysisModule module) { + module.addAnalyzer(ComboAnalyzerWrapper.NAME, ComboAnalyzerProvider.class); } } diff --git a/src/main/resources/es-plugin.properties b/src/main/resources/es-plugin.properties deleted file mode 100644 index bee2698..0000000 --- a/src/main/resources/es-plugin.properties +++ /dev/null @@ -1,2 +0,0 @@ -plugin=org.elasticsearch.plugin.analysis.combo.AnalysisComboPlugin -version=${version} diff --git a/src/main/resources/plugin-descriptor.properties b/src/main/resources/plugin-descriptor.properties new file mode 100644 index 0000000..f52083c --- /dev/null +++ b/src/main/resources/plugin-descriptor.properties @@ -0,0 +1,34 @@ +### mandatory elements for all plugins: +# +# 'description': simple summary of the plugin +description=Analyser that can multiplex multiple terms from different analyzers +# +# 'version': plugin's version +version=${project.version} +# +# 'name': the plugin name +name=analysis-combo + +### mandatory elements for jvm plugins : +# +# 'jvm': true if the 'classname' class should be loaded +# from jar files in the root directory of the plugin. +# Note that only jar files in the root directory are +# added to the classpath for the plugin! If you need +# other resources, package them into a resources jar. +jvm=true +# +# 'classname': the name of the class to load, fully-qualified. +classname=org.elasticsearch.plugin.analysis.combo.AnalysisComboPlugin +# +# 'java.version' version of java the code is built against +java.version=${mvn.java.version} +# +# 'elasticsearch.version' version of elasticsearch compiled against +# You will have to release a new version of the plugin for each new +# elasticsearch release. This version is checked when the plugin +# is loaded so Elasticsearch will refuse to start in the presence of +# plugins with the incorrect elasticsearch.version. +elasticsearch.version=${elasticsearch.version} + +isolated=false diff --git a/src/test/java/javax/io/StringReaderClonerTests.java b/src/test/java/javax/io/StringReaderClonerTests.java index 1fccc1f..ec8e3b6 100644 --- a/src/test/java/javax/io/StringReaderClonerTests.java +++ b/src/test/java/javax/io/StringReaderClonerTests.java @@ -1,5 +1,6 @@ package javax.io; +import org.apache.lucene.util.StringReaderCloner; import org.junit.Test; import javax.util.ReaderContent; diff --git a/src/test/java/org/apache/lucene/analysis/ReusableStringReaderClonerTests.java b/src/test/java/org/apache/lucene/analysis/ReusableStringReaderClonerTests.java index df4aa5b..4160d88 100644 --- a/src/test/java/org/apache/lucene/analysis/ReusableStringReaderClonerTests.java +++ b/src/test/java/org/apache/lucene/analysis/ReusableStringReaderClonerTests.java @@ -1,5 +1,6 @@ package org.apache.lucene.analysis; +import org.apache.lucene.util.ReusableStringReaderCloner; import org.junit.Test; import javax.util.ReaderContent; diff --git a/src/test/java/org/apache/lucene/analysis/TestComboAnalyzer.java b/src/test/java/org/apache/lucene/analysis/TestComboAnalyzer.java index 68cf0c1..ff8542a 100644 --- a/src/test/java/org/apache/lucene/analysis/TestComboAnalyzer.java +++ b/src/test/java/org/apache/lucene/analysis/TestComboAnalyzer.java @@ -41,7 +41,7 @@ public class TestComboAnalyzer extends BaseTokenStreamTestCase { @Test public void testSingleAnalyzer() throws IOException { - ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + ComboAnalyzer cb = new ComboAnalyzer(new WhitespaceAnalyzer()); for (int i = 0 ; i < 3 ; i++) assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)), new String[]{"just", "a", "little", "test", Integer.toString(i)}, @@ -52,9 +52,9 @@ public void testSingleAnalyzer() throws IOException { @Test public void testMultipleAnalyzers() throws IOException { - ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, - new WhitespaceAnalyzer(TEST_VERSION_CURRENT), - new StandardAnalyzer(TEST_VERSION_CURRENT), + ComboAnalyzer cb = new ComboAnalyzer( + new WhitespaceAnalyzer(), + new StandardAnalyzer(), new KeywordAnalyzer() ); for (int i = 0 ; i < 3 ; i++) @@ -67,9 +67,9 @@ public void testMultipleAnalyzers() throws IOException { @Test public void testMultipleAnalyzersDeduplication() throws IOException { - ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, - new WhitespaceAnalyzer(TEST_VERSION_CURRENT), - new StandardAnalyzer(TEST_VERSION_CURRENT), + ComboAnalyzer cb = new ComboAnalyzer( + new WhitespaceAnalyzer(), + new StandardAnalyzer(), new KeywordAnalyzer() ); cb.enableDeduplication(); @@ -81,10 +81,32 @@ public void testMultipleAnalyzersDeduplication() throws IOException { new int[]{ 1, 0, 1, 1, 1, 1}); } + @Test + public void testCanReuseForDifferentReader() throws IOException { + Analyzer analyzer = new WhitespaceAnalyzer(); + ComboAnalyzer cb = new ComboAnalyzer( + analyzer, + analyzer, + analyzer + ); + for (int i = 0 ; i < 3 ; i++) { + assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test " + i)), + new String[]{"just", "just", "just", "a", "a", "a", "little", "little", "little", "test", "test", "test", Integer.toString(i), Integer.toString(i), Integer.toString(i)}, + new int[]{0, 0, 0, 5, 5, 5, 7, 7, 7, 14, 14, 14, 19, 19, 19}, + new int[]{4, 4, 4, 6, 6, 6, 13, 13, 13, 18, 18, 18, 20, 20, 20}, + new int[]{1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}); + assertTokenStreamContents(cb.tokenStream("field", new StringReader("another little test " + i)), + new String[]{"another", "another", "another", "little", "little", "little", "test", "test", "test", Integer.toString(i), Integer.toString(i), Integer.toString(i)}, + new int[]{0, 0, 0, 8, 8, 8, 15, 15, 15, 20, 20, 20}, + new int[]{7, 7, 7, 14, 14, 14, 19, 19, 19, 21, 21, 21}, + new int[]{1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}); + } + } + @Test public void testThreeTimesTheSameAnalyzerInstance() throws IOException { - Analyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); - ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, + Analyzer analyzer = new WhitespaceAnalyzer(); + ComboAnalyzer cb = new ComboAnalyzer( analyzer, analyzer, analyzer @@ -99,12 +121,12 @@ public void testThreeTimesTheSameAnalyzerInstance() throws IOException { @Test public void testCascadeCombo() throws IOException { - ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, - new ComboAnalyzer(TEST_VERSION_CURRENT, - new WhitespaceAnalyzer(TEST_VERSION_CURRENT), + ComboAnalyzer cb = new ComboAnalyzer( + new ComboAnalyzer( + new WhitespaceAnalyzer(), new KeywordAnalyzer() ), - new StandardAnalyzer(TEST_VERSION_CURRENT), + new StandardAnalyzer(), new KeywordAnalyzer() ); for (int i = 0 ; i < 3 ; i++) @@ -118,12 +140,12 @@ public void testCascadeCombo() throws IOException { @Test public void testCascadeComboTwiceSameInstanceSolvedByCaching() throws IOException { Analyzer analyzer = new KeywordAnalyzer(); - ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, - new ComboAnalyzer(TEST_VERSION_CURRENT, - new WhitespaceAnalyzer(TEST_VERSION_CURRENT), + ComboAnalyzer cb = new ComboAnalyzer( + new ComboAnalyzer( + new WhitespaceAnalyzer(), analyzer ).enableTokenStreamCaching(), - new StandardAnalyzer(TEST_VERSION_CURRENT), + new StandardAnalyzer(), analyzer ).enableTokenStreamCaching(); for (int i = 0 ; i < 3 ; i++) @@ -136,7 +158,7 @@ public void testCascadeComboTwiceSameInstanceSolvedByCaching() throws IOExceptio @Test public void testCanUseFromNamedAnalyzer() throws IOException { - ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + ComboAnalyzer cb = new ComboAnalyzer(new WhitespaceAnalyzer()); NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb); for (int i = 0 ; i < 3 ; i++) assertTokenStreamContents(namedAnalyzer.tokenStream("field", new StringReader("just a little test " + i)), @@ -149,7 +171,7 @@ public void testCanUseFromNamedAnalyzer() throws IOException { @Test public void testReuseSequentialMultithreading() throws IOException, InterruptedException { // Create the analyzer - final ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + final ComboAnalyzer cb = new ComboAnalyzer(new WhitespaceAnalyzer()); final NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb); // Use N threads, each running M times Thread[] threads = new Thread[4]; @@ -222,7 +244,7 @@ public void run() { @Test public void testReuseConcurrentMultithreading() throws IOException, InterruptedException { // Create the analyzer - final ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + final ComboAnalyzer cb = new ComboAnalyzer(new WhitespaceAnalyzer()); final NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb); // Use N threads, each running M times Thread[] threads = new Thread[4]; diff --git a/src/test/java/org/apache/lucene/analysis/TestReusableStringReaderCloner.java b/src/test/java/org/apache/lucene/analysis/TestReusableStringReaderCloner.java index 0c24785..5aecd80 100644 --- a/src/test/java/org/apache/lucene/analysis/TestReusableStringReaderCloner.java +++ b/src/test/java/org/apache/lucene/analysis/TestReusableStringReaderCloner.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis; import org.apache.lucene.util.ReaderCloneFactory; +import org.apache.lucene.util.ReusableStringReaderCloner; import org.junit.Test; import java.io.IOException; @@ -26,7 +27,7 @@ import static javax.util.ReaderContent.assertReaderContent; /** - * Testcase for {@link org.apache.lucene.analysis.ReusableStringReaderCloner} + * Testcase for {@link ReusableStringReaderCloner} */ public class TestReusableStringReaderCloner extends BaseTokenStreamTestCase { diff --git a/src/test/java/org/apache/lucene/util/TestReaderCloneFactory.java b/src/test/java/org/apache/lucene/util/TestReaderCloneFactory.java index ae88870..06df8dd 100644 --- a/src/test/java/org/apache/lucene/util/TestReaderCloneFactory.java +++ b/src/test/java/org/apache/lucene/util/TestReaderCloneFactory.java @@ -19,7 +19,6 @@ import org.junit.Test; -import javax.io.StringReaderCloner; import java.io.BufferedReader; import java.io.CharArrayReader; import java.io.FilterReader; diff --git a/src/test/java/org/elasticsearch/index/analysis/TestIntegration.java b/src/test/java/org/elasticsearch/index/analysis/TestIntegration.java index de805ac..f5bf660 100644 --- a/src/test/java/org/elasticsearch/index/analysis/TestIntegration.java +++ b/src/test/java/org/elasticsearch/index/analysis/TestIntegration.java @@ -2,9 +2,12 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.elasticsearch.plugin.analysis.combo.AnalysisComboPlugin; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESIntegTestCase; import org.junit.Test; import java.io.IOException; @@ -16,16 +19,23 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.notNullValue; -public class TestIntegration extends ElasticsearchIntegrationTest { +public class TestIntegration extends ESIntegTestCase { protected static final String INDEX = "some_index"; protected static final String TYPE = "some_type"; public static final String ANALYZER = "configured_analyzer"; - protected void assertAnalyzesTo(String analyzer, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) { + protected Settings nodeSettings(int nodeOrdinal) { + Settings.Builder settings = Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put("plugin.types", AnalysisComboPlugin.class.getName()); + return settings.build(); + } + + protected void assertAnalyzesTo(String analyzer, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int position[]) { assertThat(output, notNullValue()); - AnalyzeResponse response = client().admin().indices().analyze(new AnalyzeRequest(INDEX, input).analyzer(analyzer)).actionGet(); + AnalyzeResponse response = client().admin().indices().analyze(new AnalyzeRequest(INDEX).text(input).analyzer(analyzer)).actionGet(); if (VERBOSE) { try { Map params = new HashMap(); @@ -36,7 +46,6 @@ protected void assertAnalyzesTo(String analyzer, String input, String[] output, } } Iterator tokens = response.iterator(); - int pos = 0; for (int i = 0; i < output.length; i++) { assertTrue("token "+i+" does not exist", tokens.hasNext()); AnalyzeResponse.AnalyzeToken token = tokens.next(); @@ -47,9 +56,8 @@ protected void assertAnalyzesTo(String analyzer, String input, String[] output, assertThat("endOffset "+i, token.getEndOffset(), equalTo(endOffsets[i])); if (types != null) assertThat("type "+i, token.getType(), equalTo(types[i])); - if (posIncrements != null) { - pos += posIncrements[i]; - assertThat("position "+i, token.getPosition(), equalTo(pos)); + if (position != null) { + assertThat("position "+i, token.getPosition(), equalTo(position[i])); } } } @@ -84,7 +92,7 @@ public void testAnalysis() throws IOException { new int[]{ 0, 0, 0, 5, 7, 7, 14, 14}, new int[]{ 4, 4, 18, 6, 13, 13, 18, 18}, null, - new int[]{ 1, 0, 0, 1, 1, 0, 1, 0}); + new int[]{ 0, 0, 0, 1, 2, 2, 3, 3}); } } From f3d4d365881416355e935afb966386a40325a53c Mon Sep 17 00:00:00 2001 From: Pelle Berglund Date: Tue, 29 Dec 2015 11:49:08 +0100 Subject: [PATCH 2/2] Upgrading to elasticsearch 2.1.1 --- pom.xml | 6 +++--- .../index/analysis/ComboAnalyzerProvider.java | 8 +++++--- src/main/resources/plugin-descriptor.properties | 2 -- .../elasticsearch/index/analysis/TestIntegration.java | 9 +++++++-- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index 0f336a8..329a767 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 com.yakaz.elasticsearch.plugins elasticsearch-analysis-combo - 2.0.0-SNAPSHOT + 2.1.1-SNAPSHOT jar 2011 @@ -38,8 +38,8 @@ - 2.0.0 - 5.2.1 + 2.1.1 + 5.3.1 1.7 diff --git a/src/main/java/org/elasticsearch/index/analysis/ComboAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/ComboAnalyzerProvider.java index 927946d..5d8e9ff 100644 --- a/src/main/java/org/elasticsearch/index/analysis/ComboAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/ComboAnalyzerProvider.java @@ -24,8 +24,9 @@ import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; -import org.elasticsearch.index.settings.IndexSettings; +import org.elasticsearch.index.settings.IndexSettingsService; public class ComboAnalyzerProvider extends AbstractIndexAnalyzerProvider { @@ -33,8 +34,9 @@ public class ComboAnalyzerProvider extends AbstractIndexAnalyzerProvider> nodePlugins() { + return pluginList(AnalysisComboPlugin.class); + } + protected Settings nodeSettings(int nodeOrdinal) { Settings.Builder settings = Settings.builder() - .put(super.nodeSettings(nodeOrdinal)) - .put("plugin.types", AnalysisComboPlugin.class.getName()); + .put(super.nodeSettings(nodeOrdinal)); return settings.build(); }