Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[8.x] Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10 (#113143) #113815

Open
wants to merge 1 commit into
base: 8.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/changelog/113143.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
pr: 113143
summary: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10
area: Analysis
type: deprecation
issues: []
deprecation:
title: Deprecate dutch_kp and lovins stemmer as they are removed in Lucene 10
area: Analysis
details: kp, dutch_kp, dutchKp and lovins stemmers are deprecated and will be removed.
impact: These stemmers will be removed and will be no longer supported.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ values: `Arabic`, `Armenian`, `Basque`, `Catalan`, `Danish`, `Dutch`, `English`,
`Lithuanian`, `Lovins`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`,
`Russian`, `Serbian`, `Spanish`, `Swedish`, `Turkish`.

deprecated:[8.16.0, `Kp` and `Lovins` support will be removed in a future version]

For example:

[source,console]
Expand All @@ -28,7 +30,7 @@ PUT /my-index-000001
"filter": {
"my_snow": {
"type": "snowball",
"language": "Lovins"
"language": "English"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,12 @@ https://snowballstem.org/algorithms/danish/stemmer.html[*`danish`*]
Dutch::
https://snowballstem.org/algorithms/dutch/stemmer.html[*`dutch`*],
https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`]
https://snowballstem.org/algorithms/kraaij_pohlmann/stemmer.html[`dutch_kp`] deprecated:[8.16.0, `dutch_kp` will be removed in a future version]
English::
https://snowballstem.org/algorithms/porter/stemmer.html[*`english`*],
https://ciir.cs.umass.edu/pubfiles/ir-35.pdf[`light_english`],
https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`],
https://snowballstem.org/algorithms/lovins/stemmer.html[`lovins`] deprecated:[8.16.0, `lovins` will be removed in a future version],
https://www.researchgate.net/publication/220433848_How_effective_is_suffixing[`minimal_english`],
https://snowballstem.org/algorithms/english/stemmer.html[`porter2`],
{lucene-analysis-docs}/en/EnglishPossessiveFilter.html[`possessive_english`]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
Expand Down Expand Up @@ -81,6 +83,8 @@

public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {

private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(StemmerTokenFilterFactory.class);

private static final TokenStream EMPTY_TOKEN_STREAM = new EmptyTokenStream();

private String language;
Expand All @@ -90,6 +94,20 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
this.language = Strings.capitalize(settings.get("language", settings.get("name", "porter")));
// check that we have a valid language by trying to create a TokenStream
create(EMPTY_TOKEN_STREAM).close();
if ("lovins".equalsIgnoreCase(language)) {
deprecationLogger.critical(
DeprecationCategory.ANALYSIS,
"lovins_deprecation",
"The [lovins] stemmer is deprecated and will be removed in a future version."
);
}
if ("dutch_kp".equalsIgnoreCase(language) || "dutchKp".equalsIgnoreCase(language) || "kp".equalsIgnoreCase(language)) {
deprecationLogger.critical(
DeprecationCategory.ANALYSIS,
"dutch_kp_deprecation",
"The [dutch_kp] stemmer is deprecated and will be removed in a future version."
);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import static org.hamcrest.Matchers.instanceOf;

public class StemmerTokenFilterFactoryTests extends ESTokenStreamTestCase {

private static final CommonAnalysisPlugin PLUGIN = new CommonAnalysisPlugin();

public void testEnglishFilterFactory() throws IOException {
Expand Down Expand Up @@ -103,4 +102,30 @@ public void testMultipleLanguagesThrowsException() throws IOException {
);
assertEquals("Invalid stemmer class specified: [english, light_english]", e.getMessage());
}

public void testKpDeprecation() throws IOException {
IndexVersion v = IndexVersionUtils.randomVersion(random());
Settings settings = Settings.builder()
.put("index.analysis.filter.my_kp.type", "stemmer")
.put("index.analysis.filter.my_kp.language", "kp")
.put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();

AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
assertCriticalWarnings("The [dutch_kp] stemmer is deprecated and will be removed in a future version.");
}

public void testLovinsDeprecation() throws IOException {
IndexVersion v = IndexVersionUtils.randomVersion(random());
Settings settings = Settings.builder()
.put("index.analysis.filter.my_lovins.type", "stemmer")
.put("index.analysis.filter.my_lovins.language", "lovins")
.put(SETTING_VERSION_CREATED, v)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();

AnalysisTestsHelper.createTestAnalysisFromSettings(settings, PLUGIN);
assertCriticalWarnings("The [lovins] stemmer is deprecated and will be removed in a future version.");
}
}
Loading