diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java index 559bc9b3ff3d..6b622444980d 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/StemmerTokenFilterFactory.java @@ -9,6 +9,7 @@ package org.elasticsearch.analysis.common; +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ar.ArabicStemFilter; import org.apache.lucene.analysis.bg.BulgarianStemFilter; @@ -86,27 +87,13 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory { private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream(); - private String language; + private final String language; StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { super(name, settings); this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter"))); // check that we have a valid language by trying to create a TokenStream create(EMPTY_TOKEN_STREAM).close(); - if ("lovins".equalsIgnoreCase(language)) { - deprecationLogger.critical( - DeprecationCategory.ANALYSIS, - "lovins_deprecation", - "The [lovins] stemmer is deprecated and will be removed in a future version." - ); - } - if ("dutch_kp".equalsIgnoreCase(language) || "dutchKp".equalsIgnoreCase(language) || "kp".equalsIgnoreCase(language)) { - deprecationLogger.critical( - DeprecationCategory.ANALYSIS, - "dutch_kp_deprecation", - "The [dutch_kp] stemmer is deprecated and will be removed in a future version." - ); - } } @Override @@ -134,8 +121,17 @@ public TokenStream create(TokenStream tokenStream) { } else if ("dutch".equalsIgnoreCase(language)) { return new SnowballFilter(tokenStream, new DutchStemmer()); } else if ("dutch_kp".equalsIgnoreCase(language) || "dutchKp".equalsIgnoreCase(language) || "kp".equalsIgnoreCase(language)) { - // TODO Lucene 10 upgrade: KPStemmer has been removed, what is the migration path for users relying on it? - throw new UnsupportedOperationException(); + deprecationLogger.critical( + DeprecationCategory.ANALYSIS, + "dutch_kp_deprecation", + "The [dutch_kp] stemmer is deprecated and will be removed in a future version." + ); + return new TokenFilter(tokenStream) { + @Override + public boolean incrementToken() { + return false; + } + }; // English stemmers } else if ("english".equalsIgnoreCase(language)) { return new PorterStemFilter(tokenStream); @@ -144,8 +140,17 @@ public TokenStream create(TokenStream tokenStream) { || "kstem".equalsIgnoreCase(language)) { return new KStemFilter(tokenStream); } else if ("lovins".equalsIgnoreCase(language)) { - // TODO Lucene 10 upgrade: LovinsStemmer has been removed, what is the migration path for users relying on it? - throw new UnsupportedOperationException(); + deprecationLogger.critical( + DeprecationCategory.ANALYSIS, + "lovins_deprecation", + "The [lovins] stemmer is deprecated and will be removed in a future version." + ); + return new TokenFilter(tokenStream) { + @Override + public boolean incrementToken() { + return false; + } + }; } else if ("porter".equalsIgnoreCase(language)) { return new PorterStemFilter(tokenStream); } else if ("porter2".equalsIgnoreCase(language)) {