elastic · javanna · Aug 30, 2024 · Aug 30, 2024 · Aug 30, 2024 · Aug 30, 2024
@@ -1,8 +1,10 @@
 steps:
   - trigger: apache-lucene-build-snapshot
-    label: Trigger pipeline to build lucene snapshot
+    label: Trigger pipeline to build lucene 10 snapshot
     key: lucene-build
-    if: build.env("LUCENE_BUILD_ID") == null || build.env("LUCENE_BUILD_ID") == ""
+    if: (build.env("LUCENE_BUILD_ID") == null || build.env("LUCENE_BUILD_ID") == "")
+    build:
+      branch: branch_10_0
   - wait
   - label: Upload and update lucene snapshot
     command: .buildkite/scripts/lucene-snapshot/upload-snapshot.sh

@@ -62,7 +62,6 @@ steps:
         matrix:
           setup:
             BWC_VERSION:
-              - 7.17.13
               - 8.9.1
               - 8.10.0
         agents:

diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/VectorScorerBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/VectorScorerBenchmark.java
@@ -19,7 +19,7 @@
 import org.apache.lucene.store.MMapDirectory;
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
 import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
-import org.apache.lucene.util.quantization.RandomAccessQuantizedByteVectorValues;
+import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
 import org.apache.lucene.util.quantization.ScalarQuantizer;
 import org.elasticsearch.common.logging.LogConfigurator;
 import org.elasticsearch.core.IOUtils;
@@ -217,19 +217,17 @@ public float squareDistanceScalar() {
         return 1 / (1f + adjustedDistance);
     }
 
-    RandomAccessQuantizedByteVectorValues vectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
+    QuantizedByteVectorValues vectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
         var sq = new ScalarQuantizer(0.1f, 0.9f, (byte) 7);
         var slice = in.slice("values", 0, in.length());
         return new OffHeapQuantizedByteVectorValues.DenseOffHeapVectorValues(dims, size, sq, false, sim, null, slice);
     }
 
-    RandomVectorScorerSupplier luceneScoreSupplier(RandomAccessQuantizedByteVectorValues values, VectorSimilarityFunction sim)
-        throws IOException {
+    RandomVectorScorerSupplier luceneScoreSupplier(QuantizedByteVectorValues values, VectorSimilarityFunction sim) throws IOException {
         return new Lucene99ScalarQuantizedVectorScorer(null).getRandomVectorScorerSupplier(sim, values);
     }
 
-    RandomVectorScorer luceneScorer(RandomAccessQuantizedByteVectorValues values, VectorSimilarityFunction sim, float[] queryVec)
-        throws IOException {
+    RandomVectorScorer luceneScorer(QuantizedByteVectorValues values, VectorSimilarityFunction sim, float[] queryVec) throws IOException {
         return new Lucene99ScalarQuantizedVectorScorer(null).getRandomVectorScorer(sim, values, queryVec);
     }
 

@@ -59,10 +59,6 @@ org.apache.lucene.util.Version#parseLeniently(java.lang.String)
 
 org.apache.lucene.index.NoMergePolicy#INSTANCE @ explicit use of NoMergePolicy risks forgetting to configure NoMergeScheduler; use org.elasticsearch.common.lucene.Lucene#indexWriterConfigWithNoMerging() instead.
 
-@defaultMessage Spawns a new thread which is solely under lucenes control use ThreadPool#relativeTimeInMillis instead
-org.apache.lucene.search.TimeLimitingCollector#getGlobalTimerThread()
-org.apache.lucene.search.TimeLimitingCollector#getGlobalCounter()
-
 @defaultMessage Don't interrupt threads use FutureUtils#cancel(Future<T>) instead
 java.util.concurrent.Future#cancel(boolean)
 

@@ -1,5 +1,5 @@
 elasticsearch     = 9.0.0
-lucene            = 9.11.1
+lucene            = 10.0.0-snapshot-22ac47c07ad
 
 bundled_jdk_vendor = openjdk
 bundled_jdk = 22.0.1+8@c7ec1332f7bb44aeba2eb341ae18aca4

diff --git a/docs/Versions.asciidoc b/docs/Versions.asciidoc
@@ -1,8 +1,8 @@
 
 include::{docs-root}/shared/versions/stack/{source_branch}.asciidoc[]
 
-:lucene_version:        9.11.1
-:lucene_version_path:   9_11_1
+:lucene_version:        10.0.0
+:lucene_version_path:   10_0_0
 :jdk:                   11.0.2
 :jdk_major:             11
 :build_type:            tar

diff --git a/docs/changelog/111465.yaml b/docs/changelog/111465.yaml
@@ -0,0 +1,5 @@
+pr: 111465
+summary: Add range and regexp Intervals
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/changelog/112826.yaml b/docs/changelog/112826.yaml
@@ -0,0 +1,6 @@
+pr: 112826
+summary: "Multi term intervals: increase max_expansions"
+area: Search
+type: enhancement
+issues:
+  - 110491
diff --git a/docs/changelog/113333.yaml b/docs/changelog/113333.yaml
@@ -0,0 +1,5 @@
+pr: 113333
+summary: Upgrade to Lucene 9.12
+area: Search
+type: upgrade
+issues: []
diff --git a/docs/plugins/analysis-nori.asciidoc b/docs/plugins/analysis-nori.asciidoc
@@ -244,11 +244,11 @@ Which responds with:
           "end_offset": 3,
           "type": "word",
           "position": 1,
-          "leftPOS": "J(Ending Particle)",
+          "leftPOS": "JKS(Subject case marker)",
           "morphemes": null,
           "posType": "MORPHEME",
           "reading": null,
-          "rightPOS": "J(Ending Particle)"
+          "rightPOS": "JKS(Subject case marker)"
         },
         {
           "token": "깊",
@@ -268,11 +268,11 @@ Which responds with:
           "end_offset": 6,
           "type": "word",
           "position": 3,
-          "leftPOS": "E(Verbal endings)",
+          "leftPOS": "ETM(Adnominal form transformative ending)",
           "morphemes": null,
           "posType": "MORPHEME",
           "reading": null,
-          "rightPOS": "E(Verbal endings)"
+          "rightPOS": "ETM(Adnominal form transformative ending)"
         },
         {
           "token": "나무",
@@ -292,11 +292,11 @@ Which responds with:
           "end_offset": 10,
           "type": "word",
           "position": 5,
-          "leftPOS": "J(Ending Particle)",
+          "leftPOS": "JX(Auxiliary postpositional particle)",
           "morphemes": null,
           "posType": "MORPHEME",
           "reading": null,
-          "rightPOS": "J(Ending Particle)"
+          "rightPOS": "JX(Auxiliary postpositional particle)"
         }
       ]
     },

diff --git a/docs/reference/analysis/analyzers/lang-analyzer.asciidoc b/docs/reference/analysis/analyzers/lang-analyzer.asciidoc
@@ -1430,7 +1430,8 @@ PUT /persian_example
             "decimal_digit",
             "arabic_normalization",
             "persian_normalization",
-            "persian_stop"
+            "persian_stop",
+            "persian_stem"
           ]
         }
       }

diff --git a/docs/reference/analysis/tokenizers/pathhierarchy-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/pathhierarchy-tokenizer.asciidoc
@@ -40,14 +40,14 @@ POST _analyze
       "start_offset": 0,
       "end_offset": 8,
       "type": "word",
-      "position": 0
+      "position": 1
     },
     {
       "token": "/one/two/three",
       "start_offset": 0,
       "end_offset": 14,
       "type": "word",
-      "position": 0
+      "position": 2
     }
   ]
 }
@@ -144,14 +144,14 @@ POST my-index-000001/_analyze
       "start_offset": 7,
       "end_offset": 18,
       "type": "word",
-      "position": 0
+      "position": 1
     },
     {
       "token": "/three/four/five",
       "start_offset": 7,
       "end_offset": 23,
       "type": "word",
-      "position": 0
+      "position": 2
     }
   ]
 }
@@ -178,14 +178,14 @@ If we were to set `reverse` to `true`, it would produce the following:
 [[analysis-pathhierarchy-tokenizer-detailed-examples]]
 === Detailed examples
 
-A common use-case for the `path_hierarchy` tokenizer is filtering results by 
-file paths. If indexing a file path along with the data, the use of the 
-`path_hierarchy` tokenizer to analyze the path allows filtering the results 
+A common use-case for the `path_hierarchy` tokenizer is filtering results by
+file paths. If indexing a file path along with the data, the use of the
+`path_hierarchy` tokenizer to analyze the path allows filtering the results
 by different parts of the file path string.
 
 
 This example configures an index to have two custom analyzers and applies
-those analyzers to multifields of the `file_path` text field that will 
+those analyzers to multifields of the `file_path` text field that will
 store filenames. One of the two analyzers uses reverse tokenization.
 Some sample documents are then indexed to represent some file paths
 for photos inside photo folders of two different users.
@@ -264,8 +264,8 @@ POST file-path-test/_doc/5
 --------------------------------------------------
 
 
-A search for a particular file path string against the text field matches all 
-the example documents, with Bob's documents ranking highest due to `bob` also 
+A search for a particular file path string against the text field matches all
+the example documents, with Bob's documents ranking highest due to `bob` also
 being one of the terms created by the standard analyzer boosting relevance for
 Bob's documents.
 
@@ -301,7 +301,7 @@ GET file-path-test/_search
 With the reverse parameter for this tokenizer, it's also possible to match
 from the other end of the file path, such as individual file names or a deep
 level subdirectory. The following example shows a search for all files named
-`my_photo1.jpg` within any directory via the `file_path.tree_reversed` field 
+`my_photo1.jpg` within any directory via the `file_path.tree_reversed` field
 configured to use the reverse parameter in the mapping.
 
 
@@ -342,7 +342,7 @@ POST file-path-test/_analyze
 
 
 It's also useful to be able to filter with file paths when combined with other
-types of searches, such as this example looking for any files paths with `16` 
+types of searches, such as this example looking for any files paths with `16`
 that also must be in Alice's photo directory.
 
 [source,console]

diff --git a/docs/reference/modules/threadpool.asciidoc b/docs/reference/modules/threadpool.asciidoc
@@ -13,16 +13,10 @@ There are several thread pools, but the important ones include:
 
 [[search-threadpool]]
 `search`::
-    For coordination of count/search operations at the shard level whose computation
-    is offloaded to the search_worker thread pool. Used also by fetch and other search
+    For count/search operations at the shard level. Used also by fetch and other search
     related operations  Thread pool type is `fixed` with a size of `int((`<<node.processors,
     `# of allocated processors`>>`pass:[ * ]3) / 2) + 1`, and queue_size of `1000`.
 
-`search_worker`::
-    For the heavy workload of count/search operations that may be executed concurrently
-    across segments within the same shard when possible. Thread pool type is `fixed`
-    with a size of `int((`<<node.processors, `# of allocated processors`>>`pass:[ * ]3) / 2) + 1`, and unbounded queue_size .
-
 [[search-throttled]]`search_throttled`::
     For count/search/suggest/get operations on `search_throttled indices`.
     Thread pool type is `fixed` with a size of `1`, and queue_size of `100`.

diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc
@@ -73,7 +73,9 @@ Valid rules include:
 * <<intervals-match,`match`>>
 * <<intervals-prefix,`prefix`>>
 * <<intervals-wildcard,`wildcard`>>
+* <<intervals-regexp,`regexp`>>
 * <<intervals-fuzzy,`fuzzy`>>
+* <<intervals-range,`range`>>
 * <<intervals-all_of,`all_of`>>
 * <<intervals-any_of,`any_of`>>
 --
@@ -122,8 +124,9 @@ unstemmed ones.
 ==== `prefix` rule parameters
 
 The `prefix` rule matches terms that start with a specified set of characters.
-This prefix can expand to match at most 128 terms. If the prefix matches more
-than 128 terms, {es} returns an error. You can use the
+This prefix can expand to match at most `indices.query.bool.max_clause_count`
+<<search-settings,search setting>> terms. If the prefix matches more terms,
+{es} returns an error. You can use the
 <<index-prefixes,`index-prefixes`>> option in the field mapping to avoid this
 limit.
 
@@ -149,7 +152,8 @@ separate `analyzer` is specified.
 ==== `wildcard` rule parameters
 
 The `wildcard` rule matches terms using a wildcard pattern. This pattern can
-expand to match at most 128 terms. If the pattern matches more than 128 terms,
+expand to match at most  `indices.query.bool.max_clause_count`
+<<search-settings,search setting>> terms. If the pattern matches more terms,
 {es} returns an error.
 
 `pattern`::
@@ -178,12 +182,45 @@ The `pattern` is normalized using the search analyzer from this field, unless
 `analyzer` is specified separately.
 --
 
+[[intervals-regexp]]
+==== `regexp` rule parameters
+
+The `regexp` rule matches terms using a regular expression pattern.
+This pattern can expand to match at most  `indices.query.bool.max_clause_count`
+<<search-settings,search setting>> terms.
+If the pattern matches more terms,{es} returns an error.
+
+`pattern`::
+(Required, string) Regexp pattern used to find matching terms.
+For a list of operators supported by the
+`regexp` pattern, see <<regexp-syntax, Regular expression syntax>>.
+
+WARNING: Avoid using wildcard patterns, such as `.*` or `.*?+``. This can
+increase the iterations needed to find matching terms and slow search
+performance.
+--
+`analyzer`::
+(Optional, string) <<analysis, analyzer>> used to normalize the `pattern`.
+Defaults to the top-level `<field>`'s analyzer.
+
+--
+`use_field`::
++
+--
+(Optional, string) If specified, match intervals from this field rather than the
+top-level `<field>`.
+
+The `pattern` is normalized using the search analyzer from this field, unless
+`analyzer` is specified separately.
+--
+
 [[intervals-fuzzy]]
 ==== `fuzzy` rule parameters
 
 The `fuzzy` rule matches terms that are similar to the provided term, within an
 edit distance defined by <<fuzziness>>. If the fuzzy expansion matches more than
-128 terms, {es} returns an error.
+`indices.query.bool.max_clause_count`
+<<search-settings,search setting>> terms, {es} returns an error.
 
 `term`::
 (Required, string) The term to match
@@ -214,6 +251,41 @@ The `term` is normalized using the search analyzer from this field, unless
 `analyzer` is specified separately.
 --
 
+[[intervals-range]]
+==== `range` rule parameters
+
+The `range` rule matches terms contained within a provided range.
+This range can expand to match at most  `indices.query.bool.max_clause_count`
+<<search-settings,search setting>> terms.
+If the range matches more terms,{es} returns an error.
+
+`gt`::
+(Optional, string) Greater than: match terms greater than the provided term.
+
+`gte`::
+(Optional, string) Greater than or equal to: match terms greater than or
+equal to the provided term.
+
+`lt`::
+(Optional, string) Less than: match terms less than the provided term.
+
+`lte`::
+(Optional, string) Less than or equal to: match terms less than or
+equal to the provided term.
+
+NOTE: It is required to provide one of `gt` or `gte` params.
+It is required to provide one of `lt` or `lte` params.
+
+
+`analyzer`::
+(Optional, string) <<analysis, analyzer>> used to normalize the `pattern`.
+Defaults to the top-level `<field>`'s analyzer.
+
+`use_field`::
+(Optional, string) If specified, match intervals from this field rather than the
+top-level `<field>`.
+
+
 [[intervals-all_of]]
 ==== `all_of` rule parameters
 

diff --git a/docs/reference/search/profile.asciidoc b/docs/reference/search/profile.asciidoc
@@ -1298,7 +1298,7 @@ One of the `dfs.knn` sections for a shard looks like the following:
         "query" : [
             {
                 "type" : "DocAndScoreQuery",
-                "description" : "DocAndScore[100]",
+                "description" : "DocAndScoreQuery[0,...][0.008961825,...],0.008961825",
                 "time_in_nanos" : 444414,
                 "breakdown" : {
                   "set_min_competitive_score_count" : 0,