Skip to content

Commit

Permalink
Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10 (
Browse files Browse the repository at this point in the history
#113143)

Lucene 10 has upgraded its Snowball stemming support, as part of those
upgrades, two no longer supported stemmers were removed, `KpStemmer` and
`LovinsStemmer`. These are `dutch_kp` and `lovins`, respectively.

We will deprecate in 8.16 and will remove support for these in a future
version.
  • Loading branch information
benwtrent committed Sep 30, 2024
1 parent b26d81c commit 5c840f7
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 4 deletions.
10 changes: 10 additions & 0 deletions docs/changelog/113143.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
pr: 113143
summary: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10
area: Analysis
type: deprecation
issues: []
deprecation:
title: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10
area: Analysis
details: kp, dutch_kp, dutchKp and lovins stemmers are deprecated and will be removed.
impact: These stemmers will be removed and will be no longer supported.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ values: `Arabic`, `Armenian`, `Basque`, `Catalan`, `Danish`, `Dutch`, `English`,
`Lithuanian`, `Lovins`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`,
`Russian`, `Serbian`, `Spanish`, `Swedish`, `Turkish`.

deprecated:[8.16.0, `Kp` and `Lovins` support will be removed in a future version]

For example:

[source,console]
Expand All @@ -28,7 +30,7 @@ PUT /my-index-000001
"filter": {
"my_snow": {
"type": "snowball",
"language": "Lovins"
"language": "English"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,12 @@ https://snowballstem.org/algorithms/danish/stemmer.html[*`danish`*]
Dutch::
https://snowballstem.org/algorithms/dutch/stemmer.html[*`dutch`*],
https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`]
https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`] deprecated:[8.16.0, `dutch_kp` will be removed in a future version]
English::
https://snowballstem.org/algorithms/porter/stemmer.html[*`english`*],
https://ciir.cs.umass.edu/pubfiles/ir-35.pdf[`light_english`],
https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`],
https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`] deprecated:[8.16.0, `lovins` will be removed in a future version],
https://www.researchgate.net/publication/220433848_How_effective_is_suffixing[`minimal_english`],
https://snowballstem.org/algorithms/english/stemmer.html[`porter2`],
{lucene-analysis-docs}/en/EnglishPossessiveFilter.html[`possessive_english`]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
Expand Down Expand Up @@ -81,6 +83,8 @@

public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {

private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(StemmerTokenFilterFactory.class);

private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream();

private String language;
Expand All @@ -90,6 +94,20 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
// check that we have a valid language by trying to create a TokenStream
create(EMPTY_TOKEN_STREAM).close();
if ("lovins".equalsIgnoreCase(language)) {
deprecationLogger.critical(
DeprecationCategory.ANALYSIS,
"lovins_deprecation",
"The [lovins] stemmer is deprecated and will be removed in a future version."
);
}
if ("dutch_kp".equalsIgnoreCase(language) || "dutchKp".equalsIgnoreCase(language) || "kp".equalsIgnoreCase(language)) {
deprecationLogger.critical(
DeprecationCategory.ANALYSIS,
"dutch_kp_deprecation",
"The [dutch_kp] stemmer is deprecated and will be removed in a future version."
);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import static org.hamcrest.Matchers.instanceOf;

public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {

private static final CommonAnalysisPlugin PLUGIN = new CommonAnalysisPlugin();

public void testEnglishFilterFactory() throws IOException {
Expand Down Expand Up @@ -103,4 +102,30 @@ public void testMultipleLanguagesThrowsException() throws IOException {
);
assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
}

public void testKpDeprecation() throws IOException {
IndexVersion v = IndexVersionUtils.randomVersion(random());
Settings settings = Settings.builder()
.put("index.analysis.filter.my_kp.type", "stemmer")
.put("index.analysis.filter.my_kp.language", "kp")
.put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();

AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
assertCriticalWarnings("The [dutch_kp] stemmer is deprecated and will be removed in a future version.");
}

public void testLovinsDeprecation() throws IOException {
IndexVersion v = IndexVersionUtils.randomVersion(random());
Settings settings = Settings.builder()
.put("index.analysis.filter.my_lovins.type", "stemmer")
.put("index.analysis.filter.my_lovins.language", "lovins")
.put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();

AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
assertCriticalWarnings("The [lovins] stemmer is deprecated and will be removed in a future version.");
}
}

0 comments on commit 5c840f7

Please sign in to comment.