Merge branch 'main' into lucene_snapshot

elastic · Sep 29, 2024 · db54b81 · db54b81
2 parents 4a79e51 + e0a2d61
commit db54b81
Show file tree

Hide file tree

Showing 72 changed files with 2,653 additions and 574 deletions.
diff --git a/.buildkite/pull-requests.json b/.buildkite/pull-requests.json
@@ -14,21 +14,6 @@
       "trigger_comment_regex": "(run\\W+elasticsearch-ci.+)|(^\\s*((buildkite|@elastic(search)?machine)\\s*)?test\\s+this(\\s+please)?)",
       "cancel_intermediate_builds": true,
       "cancel_intermediate_builds_on_comment": false
-    },
-    {
-      "enabled": true,
-      "pipeline_slug": "elasticsearch-pull-request-check-serverless-submodule",
-      "allow_org_users": true,
-      "allowed_repo_permissions": [
-        "admin",
-        "write"
-      ],
-      "set_commit_status": false,
-      "build_on_commit": true,
-      "build_on_comment": false,
-      "labels": [
-        "test-update-serverless"
-      ]
     }
   ]
 }
diff --git a/build-tools-internal/gradle/wrapper/gradle-wrapper.properties b/build-tools-internal/gradle/wrapper/gradle-wrapper.properties
@@ -1,7 +1,7 @@
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
-distributionSha256Sum=fdfca5dbc2834f0ece5020465737538e5ba679deeff5ab6c09621d67f8bb1a15
-distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.1-all.zip
+distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip
 networkTimeout=10000
 validateDistributionUrl=true
 zipStoreBase=GRADLE_USER_HOME

diff --git a/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle b/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle
@@ -169,6 +169,7 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') {
               '-ea',
               '-Djava.security.manager=allow',
               '-Djava.locale.providers=CLDR',
+              '-Dtests.testfeatures.enabled=true',
               '-Des.nativelibs.path="' + testLibraryPath + '"',
               // TODO: only open these for mockito when it is modularized
               '--add-opens=java.base/java.security.cert=ALL-UNNAMED',

diff --git a/...internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java b/...internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java
@@ -108,6 +108,7 @@ public void execute(Task t) {
                 "-Xmx" + System.getProperty("tests.heap.size", "512m"),
                 "-Xms" + System.getProperty("tests.heap.size", "512m"),
                 "-Djava.security.manager=allow",
+                "-Dtests.testfeatures.enabled=true",
                 "--add-opens=java.base/java.util=ALL-UNNAMED",
                 // TODO: only open these for mockito when it is modularized
                 "--add-opens=java.base/java.security.cert=ALL-UNNAMED",

diff --git a/build-tools-internal/src/main/resources/minimumGradleVersion b/build-tools-internal/src/main/resources/minimumGradleVersion
@@ -1 +1 @@
-8.10.1
+8.10.2
diff --git a/docs/changelog/111834.yaml b/docs/changelog/111834.yaml
@@ -0,0 +1,5 @@
+pr: 111834
+summary: Add inner hits support to semantic query
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/changelog/113413.yaml b/docs/changelog/113413.yaml
@@ -0,0 +1,6 @@
+pr: 113413
+summary: Fixed a `NullPointerException` in `_capabilities` API when the `path` parameter is null.
+area: Infra/REST API
+type: bug
+issues:
+  - 113413
diff --git a/docs/changelog/113552.yaml b/docs/changelog/113552.yaml
@@ -0,0 +1,5 @@
+pr: 113552
+summary: Tag redacted document in ingest metadata
+area: Ingest Node
+type: enhancement
+issues: []
diff --git a/docs/changelog/113699.yaml b/docs/changelog/113699.yaml
@@ -0,0 +1,5 @@
+pr: 113699
+summary: "[ESQL] Fix init value in max float aggregation"
+area: ES|QL
+type: bug
+issues: []
diff --git a/docs/reference/ingest/processors/redact.asciidoc b/docs/reference/ingest/processors/redact.asciidoc
@@ -39,6 +39,7 @@ patterns. Legacy Grok patterns are not supported.
 | `ignore_missing`       | no        | `true`              | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
 include::common-options.asciidoc[]
 | `skip_if_unlicensed`   | no        | `false`             | If `true` and the current license does not support running redact processors, then the processor quietly exits without modifying the document
+| `trace_redact`         | no        | `false`             | If `true` then ingest metadata `_ingest._redact._is_redacted` is set to `true` if the document has been redacted
 |======
 
 In this example the predefined `IP` Grok pattern is used to match

diff --git a/docs/reference/query-dsl/semantic-query.asciidoc b/docs/reference/query-dsl/semantic-query.asciidoc
@@ -25,7 +25,7 @@ GET my-index-000001/_search
   }
 }
 ------------------------------------------------------------
-// TEST[skip:TBD]
+// TEST[skip: Requires inference endpoints]
 
 
 [discrete]
@@ -40,9 +40,209 @@ The `semantic_text` field to perform the query on.
 (Required, string)
 The query text to be searched for on the field.
 
+`inner_hits`::
+(Optional, object)
+Retrieves the specific passages that match the query.
+See <<semantic-query-passage-ranking, passage ranking with the `semantic` query>> for more information.
++
+.Properties of `inner_hits`
+[%collapsible%open]
+====
+`from`::
+(Optional, integer)
+The offset from the first matching passage to fetch.
+Used to paginate through the passages.
+Defaults to `0`.
+
+`size`::
+(Optional, integer)
+The maximum number of matching passages to return.
+Defaults to `3`.
+====
 
 Refer to <<semantic-search-semantic-text,this tutorial>> to learn more about semantic search using `semantic_text` and `semantic` query.
 
+[discrete]
+[[semantic-query-passage-ranking]]
+==== Passage ranking with the `semantic` query
+The `inner_hits` parameter can be used for _passage ranking_, which allows you to determine which passages in the document best match the query.
+For example, if you have a document that covers varying topics:
+
+[source,console]
+------------------------------------------------------------
+POST my-index/_doc/lake_tahoe
+{
+  "inference_field": [
+    "Lake Tahoe is the largest alpine lake in North America",
+    "When hiking in the area, please be on alert for bears"
+  ]
+}
+------------------------------------------------------------
+// TEST[skip: Requires inference endpoints]
+
+You can use passage ranking to find the passage that best matches your query:
+
+[source,console]
+------------------------------------------------------------
+GET my-index/_search
+{
+  "query": {
+    "semantic": {
+      "field": "inference_field",
+      "query": "mountain lake",
+      "inner_hits": { }
+    }
+  }
+}
+------------------------------------------------------------
+// TEST[skip: Requires inference endpoints]
+
+[source,console-result]
+------------------------------------------------------------
+{
+    "took": 67,
+    "timed_out": false,
+    "_shards": {
+        "total": 1,
+        "successful": 1,
+        "skipped": 0,
+        "failed": 0
+    },
+    "hits": {
+        "total": {
+            "value": 1,
+            "relation": "eq"
+        },
+        "max_score": 10.844536,
+        "hits": [
+            {
+                "_index": "my-index",
+                "_id": "lake_tahoe",
+                "_score": 10.844536,
+                "_source": {
+                    ...
+                },
+                "inner_hits": { <1>
+                    "inference_field": {
+                        "hits": {
+                            "total": {
+                                "value": 2,
+                                "relation": "eq"
+                            },
+                            "max_score": 10.844536,
+                            "hits": [
+                                {
+                                    "_index": "my-index",
+                                    "_id": "lake_tahoe",
+                                    "_nested": {
+                                        "field": "inference_field.inference.chunks",
+                                        "offset": 0
+                                    },
+                                    "_score": 10.844536,
+                                    "_source": {
+                                        "text": "Lake Tahoe is the largest alpine lake in North America"
+                                    }
+                                },
+                                {
+                                    "_index": "my-index",
+                                    "_id": "lake_tahoe",
+                                    "_nested": {
+                                        "field": "inference_field.inference.chunks",
+                                        "offset": 1
+                                    },
+                                    "_score": 3.2726858,
+                                    "_source": {
+                                        "text": "When hiking in the area, please be on alert for bears"
+                                    }
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        ]
+    }
+}
+------------------------------------------------------------
+<1> Ranked passages will be returned using the <<inner-hits,`inner_hits` response format>>, with `<inner_hits_name>` set to the `semantic_text` field name.
+
+By default, the top three matching passages will be returned.
+You can use the `size` parameter to control the number of passages returned and the `from` parameter to page through the matching passages:
+
+[source,console]
+------------------------------------------------------------
+GET my-index/_search
+{
+  "query": {
+    "semantic": {
+      "field": "inference_field",
+      "query": "mountain lake",
+      "inner_hits": {
+        "from": 1,
+        "size": 1
+      }
+    }
+  }
+}
+------------------------------------------------------------
+// TEST[skip: Requires inference endpoints]
+
+[source,console-result]
+------------------------------------------------------------
+{
+    "took": 42,
+    "timed_out": false,
+    "_shards": {
+        "total": 1,
+        "successful": 1,
+        "skipped": 0,
+        "failed": 0
+    },
+    "hits": {
+        "total": {
+            "value": 1,
+            "relation": "eq"
+        },
+        "max_score": 10.844536,
+        "hits": [
+            {
+                "_index": "my-index",
+                "_id": "lake_tahoe",
+                "_score": 10.844536,
+                "_source": {
+                    ...
+                },
+                "inner_hits": {
+                    "inference_field": {
+                        "hits": {
+                            "total": {
+                                "value": 2,
+                                "relation": "eq"
+                            },
+                            "max_score": 10.844536,
+                            "hits": [
+                                {
+                                    "_index": "my-index",
+                                    "_id": "lake_tahoe",
+                                    "_nested": {
+                                        "field": "inference_field.inference.chunks",
+                                        "offset": 1
+                                    },
+                                    "_score": 3.2726858,
+                                    "_source": {
+                                        "text": "When hiking in the area, please be on alert for bears"
+                                    }
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        ]
+    }
+}
+------------------------------------------------------------
+
 [discrete]
 [[hybrid-search-semantic]]
 ==== Hybrid search with the `semantic` query
@@ -79,7 +279,7 @@ POST my-index/_search
   }
 }
 ------------------------------------------------------------
-// TEST[skip:TBD]
+// TEST[skip: Requires inference endpoints]
 
 You can also use semantic_text as part of <<rrf,Reciprocal Rank Fusion>> to make ranking relevant results easier:
 
@@ -116,12 +316,12 @@ GET my-index/_search
   }
 }
 ------------------------------------------------------------
-// TEST[skip:TBD]
+// TEST[skip: Requires inference endpoints]
 
 
 [discrete]
 [[advanced-search]]
-=== Advanced search on `semantic_text` fields
+==== Advanced search on `semantic_text` fields
 
 The `semantic` query uses default settings for searching on `semantic_text` fields for ease of use.
 If you want to fine-tune a search on a `semantic_text` field, you need to know the task type used by the `inference_id` configured in `semantic_text`.
@@ -135,7 +335,7 @@ on a `semantic_text` field, it is not supported to use the `semantic_query` on a
 
 [discrete]
 [[search-sparse-inference]]
-==== Search with `sparse_embedding` inference
+===== Search with `sparse_embedding` inference
 
 When the {infer} endpoint uses a `sparse_embedding` model, you can use a <<query-dsl-sparse-vector-query,`sparse_vector` query>> on a <<semantic-text,`semantic_text`>> field in the following way:
 
@@ -157,14 +357,14 @@ GET test-index/_search
   }
 }
 ------------------------------------------------------------
-// TEST[skip:TBD]
+// TEST[skip: Requires inference endpoints]
 
 You can customize the `sparse_vector` query to include specific settings, like <<sparse-vector-query-with-pruning-config-and-rescore-example,pruning configuration>>.
 
 
 [discrete]
 [[search-text-inferece]]
-==== Search with `text_embedding` inference
+===== Search with `text_embedding` inference
 
 When the {infer} endpoint uses a `text_embedding` model, you can use a <<query-dsl-knn-query,`knn` query>> on a `semantic_text` field in the following way:
 
@@ -190,6 +390,6 @@ GET test-index/_search
   }
 }
 ------------------------------------------------------------
-// TEST[skip:TBD]
+// TEST[skip: Requires inference endpoints]
 
 You can customize the `knn` query to include specific settings, like `num_candidates` and `k`.
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
@@ -1,7 +1,7 @@
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
-distributionSha256Sum=fdfca5dbc2834f0ece5020465737538e5ba679deeff5ab6c09621d67f8bb1a15
-distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.1-all.zip
+distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip
 networkTimeout=10000
 validateDistributionUrl=true
 zipStoreBase=GRADLE_USER_HOME