Skip to content

Commit

Permalink
Make dutch_kp and lovins no op token filters
Browse files Browse the repository at this point in the history
  • Loading branch information
javanna committed Sep 30, 2024
1 parent a969b1d commit 79d3d6a
Showing 1 changed file with 24 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
Expand Down Expand Up @@ -86,27 +87,13 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {

private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream();

private String language;
private final String language;

StemmerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
super(name, settings);
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
// check that we have a valid language by trying to create a TokenStream
create(EMPTY_TOKEN_STREAM).close();
if ("lovins".equalsIgnoreCase(language)) {
deprecationLogger.critical(
DeprecationCategory.ANALYSIS,
"lovins_deprecation",
"The [lovins] stemmer is deprecated and will be removed in a future version."
);
}
if ("dutch_kp".equalsIgnoreCase(language) || "dutchKp".equalsIgnoreCase(language) || "kp".equalsIgnoreCase(language)) {
deprecationLogger.critical(
DeprecationCategory.ANALYSIS,
"dutch_kp_deprecation",
"The [dutch_kp] stemmer is deprecated and will be removed in a future version."
);
}
}

@Override
Expand Down Expand Up @@ -134,8 +121,17 @@ public TokenStream create(TokenStream tokenStream) {
} else if ("dutch".equalsIgnoreCase(language)) {
return new SnowballFilter(tokenStream, new DutchStemmer());
} else if ("dutch_kp".equalsIgnoreCase(language) || "dutchKp".equalsIgnoreCase(language) || "kp".equalsIgnoreCase(language)) {
// TODO Lucene 10 upgrade: KPStemmer has been removed, what is the migration path for users relying on it?
throw new UnsupportedOperationException();
deprecationLogger.critical(
DeprecationCategory.ANALYSIS,
"dutch_kp_deprecation",
"The [dutch_kp] stemmer is deprecated and will be removed in a future version."
);
return new TokenFilter(tokenStream) {
@Override
public boolean incrementToken() {
return false;
}
};
// English stemmers
} else if ("english".equalsIgnoreCase(language)) {
return new PorterStemFilter(tokenStream);
Expand All @@ -144,8 +140,17 @@ public TokenStream create(TokenStream tokenStream) {
|| "kstem".equalsIgnoreCase(language)) {
return new KStemFilter(tokenStream);
} else if ("lovins".equalsIgnoreCase(language)) {
// TODO Lucene 10 upgrade: LovinsStemmer has been removed, what is the migration path for users relying on it?
throw new UnsupportedOperationException();
deprecationLogger.critical(
DeprecationCategory.ANALYSIS,
"lovins_deprecation",
"The [lovins] stemmer is deprecated and will be removed in a future version."
);
return new TokenFilter(tokenStream) {
@Override
public boolean incrementToken() {
return false;
}
};
} else if ("porter".equalsIgnoreCase(language)) {
return new PorterStemFilter(tokenStream);
} else if ("porter2".equalsIgnoreCase(language)) {
Expand Down

0 comments on commit 79d3d6a

Please sign in to comment.