From d00f205968f50dd170e4d1f90cf9e8972470c75c Mon Sep 17 00:00:00 2001 From: Shuyou Date: Tue, 3 Sep 2024 19:02:29 +0800 Subject: [PATCH] fix: zh docs --- localization/v2.4.x/site/zh/about/overview.md | 5 +- .../site/zh/getstarted/install-overview.md | 2 +- .../zh/integrations/integrate_with_bentoml.md | 2 +- .../zh/integrations/integrate_with_spark.md | 2 +- .../langchain/basic_usage_langchain.md | 97 +++++---- .../v2.4.x/site/zh/menuStructure/zh.json | 8 +- localization/v2.4.x/site/zh/migrate/es2m.md | 2 +- .../v2.4.x/site/zh/reference/boolean.md | 3 +- .../v2.4.x/site/zh/reference/glossary.md | 6 +- .../v2.4.x/site/zh/reference/multi_tenancy.md | 5 +- .../v2.4.x/site/zh/reference/schema.json | 32 ++- .../v2.4.x/site/zh/reference/schema.md | 67 +++--- .../reference/sys_config/configure_proxy.md | 5 +- localization/v2.4.x/site/zh/release_notes.md | 12 +- .../zh/tutorials/build-rag-with-milvus.md | 4 +- .../zh/userGuide/clustering-compaction.md | 21 +- .../data-import/prepare-source-data.md | 96 +++++---- .../zh/userGuide/enable-dynamic-field.json | 48 ++++- .../site/zh/userGuide/enable-dynamic-field.md | 165 +++++++------- .../site/zh/userGuide/insert-update-delete.md | 107 +++++----- .../site/zh/userGuide/manage-collections.md | 201 ++++++++++-------- .../site/zh/userGuide/manage-partitions.md | 77 ++++--- .../site/zh/userGuide/tools/cli_commands.md | 4 +- .../site/zh/userGuide/use-partition-key.json | 64 +++++- .../site/zh/userGuide/use-partition-key.md | 159 +++++++------- tools/glossary.json | 4 +- tools/utils.js | 2 +- v2.0.x/site/zh-CN/reference/glossary.md | 2 +- 28 files changed, 709 insertions(+), 493 deletions(-) diff --git a/localization/v2.4.x/site/zh/about/overview.md b/localization/v2.4.x/site/zh/about/overview.md index 797936ef6..6d0f0e035 100644 --- a/localization/v2.4.x/site/zh/about/overview.md +++ b/localization/v2.4.x/site/zh/about/overview.md @@ -6,6 +6,7 @@ summary: >- Milvus 是一个高性能、高度可扩展的向量数据库,可在从笔记本电脑到大型分布式系统等各种环境中高效运行。它既可以开源软件的形式提供,也可以云服务的形式提供。 --- +

Milvus 是什么?

使用 Milvus Lite 创建集合包括两个步骤:首先是定义模式,其次是定义索引。在本节中,我们需要一个模块:DataType 告诉我们字段中的数据类型。我们还需要使用两个函数来创建模式和添加字段:create_schema():创建一个集合模式;add_field():在集合模式中添加一个字段。

+

使用 Milvus Lite 创建集合包括两个步骤:首先是定义模式,其次是定义索引。在本节中,我们需要一个模块:DataType 告诉我们字段中的数据类型。我们还需要使用两个函数来创建模式和添加字段:create_schema():创建一个Collection Schema ;add_field():在Collection Schema 中添加一个字段。

from pymilvus import MilvusClient, DataType, Collection
 
 # Create schema
diff --git a/localization/v2.4.x/site/zh/integrations/integrate_with_spark.md b/localization/v2.4.x/site/zh/integrations/integrate_with_spark.md
index 7953eb2e0..ca1aa0d0b 100644
--- a/localization/v2.4.x/site/zh/integrations/integrate_with_spark.md
+++ b/localization/v2.4.x/site/zh/integrations/integrate_with_spark.md
@@ -179,7 +179,7 @@ object Hello extends milvusbinlog:用于读取 Milvus 内置 binlog 数据的 Milvus 数据格式。
 
  • mjson:用于向 Milvus 批量插入数据的 Milvus JSON 格式。
  • -

    Milvus

    快速入门中,我们使用milvus格式将样本数据写入 Milvus 集群。milvus格式是一种新的数据格式,支持将 Spark DataFrame 数据无缝写入 Milvus 集合。这是通过批量调用 Milvus SDK 的插入 API 来实现的。如果 Milvus 中不存在集合,则会根据数据帧的模式创建新的集合。不过,自动创建的集合可能不支持集合模式的所有功能。因此,建议先通过 SDK 创建一个集合,然后再使用 spark-milvus 进行编写。有关详细信息,请参阅演示

    +

    Milvus

    快速入门中,我们使用milvus格式将样本数据写入 Milvus 集群。milvus格式是一种新的数据格式,支持将 Spark DataFrame 数据无缝写入 Milvus 集合。这是通过批量调用 Milvus SDK 的插入 API 来实现的。如果 Milvus 中不存在集合,则会根据数据帧的模式创建新的集合。不过,自动创建的集合可能不支持Collection Schema 的所有功能。因此,建议先通过 SDK 创建一个集合,然后再使用 spark-milvus 进行编写。有关详细信息,请参阅演示

    milvusbinlog

    新数据格式milvusbinlog用于读取 Milvus 内置的 binlog 数据。Binlog 是 Milvus 基于 parquet 的内部数据存储格式。除非你熟悉Milvus内部存储的细节,否则不建议直接使用milvusbinlog。我们建议使用下一节将介绍的MilvusUtils函数。

    val df = spark.read
       .format("milvusbinlog")
    diff --git a/localization/v2.4.x/site/zh/integrations/langchain/basic_usage_langchain.md b/localization/v2.4.x/site/zh/integrations/langchain/basic_usage_langchain.md
    index a57820bfa..527c28038 100644
    --- a/localization/v2.4.x/site/zh/integrations/langchain/basic_usage_langchain.md
    +++ b/localization/v2.4.x/site/zh/integrations/langchain/basic_usage_langchain.md
    @@ -3,6 +3,7 @@ id: basic_usage_langchain.md
     summary: 本笔记本介绍如何使用与 Milvus 向量数据库相关的功能。
     title: 将 Milvus 用作向量存储库
     ---
    +
     

    将 Milvus 用作向量存储库

    +
    # | output: false
     # | echo: false
     from langchain_openai import OpenAIEmbeddings
    @@ -82,13 +84,14 @@ vector_store = Milvus(
     

    以下是创建新集合的方法

    from langchain_core.documents import Document
     
    -vector_store_saved = Milvus.from_documents(
    -    [Document(page_content="foo!")],
    -    embeddings,
    -    collection_name="langchain_example",
    -    connection_args={"uri": URI},
    +vector*store_saved = Milvus.from*documents(
    +[Document(page*content="foo!")],
    +embeddings,
    +collection_name="langchain_example",
    +connection_args={"uri": URI},
     )
     
    +

    以下是如何检索存储的集合

    vector_store_loaded = Milvus(
         embeddings,
    @@ -118,71 +121,72 @@ vector_store_saved = Milvus.from langchain_core.documents import Document
     
     document_1 = Document(
    -    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    -    metadata={"source": "tweet"},
    +page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    +metadata={"source": "tweet"},
     )
     
     document_2 = Document(
    -    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    -    metadata={"source": "news"},
    +page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    +metadata={"source": "news"},
     )
     
     document_3 = Document(
    -    page_content="Building an exciting new project with LangChain - come check it out!",
    -    metadata={"source": "tweet"},
    +page_content="Building an exciting new project with LangChain - come check it out!",
    +metadata={"source": "tweet"},
     )
     
     document_4 = Document(
    -    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    -    metadata={"source": "news"},
    +page_content="Robbers broke into the city bank and stole $1 million in cash.",
    +metadata={"source": "news"},
     )
     
     document_5 = Document(
    -    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    -    metadata={"source": "tweet"},
    +page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    +metadata={"source": "tweet"},
     )
     
     document_6 = Document(
    -    page_content="Is the new iPhone worth the price? Read this review to find out.",
    -    metadata={"source": "website"},
    +page_content="Is the new iPhone worth the price? Read this review to find out.",
    +metadata={"source": "website"},
     )
     
     document_7 = Document(
    -    page_content="The top 10 soccer players in the world right now.",
    -    metadata={"source": "website"},
    +page_content="The top 10 soccer players in the world right now.",
    +metadata={"source": "website"},
     )
     
     document_8 = Document(
    -    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    -    metadata={"source": "tweet"},
    +page_content="LangGraph is the best framework for building stateful, agentic applications!",
    +metadata={"source": "tweet"},
     )
     
     document_9 = Document(
    -    page_content="The stock market is down 500 points today due to fears of a recession.",
    -    metadata={"source": "news"},
    +page_content="The stock market is down 500 points today due to fears of a recession.",
    +metadata={"source": "news"},
     )
     
     document_10 = Document(
    -    page_content="I have a bad feeling I am going to get deleted :(",
    -    metadata={"source": "tweet"},
    +page_content="I have a bad feeling I am going to get deleted :(",
    +metadata={"source": "tweet"},
     )
     
     documents = [
    -    document_1,
    -    document_2,
    -    document_3,
    -    document_4,
    -    document_5,
    -    document_6,
    -    document_7,
    -    document_8,
    -    document_9,
    -    document_10,
    +document_1,
    +document_2,
    +document_3,
    +document_4,
    +document_5,
    +document_6,
    +document_7,
    +document_8,
    +document_9,
    +document_10,
     ]
     uuids = [str(uuid4()) for _ in range(len(documents))]
     
     vector_store.add_documents(documents=documents, ids=uuids)
     
    +
    ['b0248595-2a41-4f6b-9c25-3a24c1278bb3',
      'fa642726-5329-4495-a072-187e948dd71f',
      '9905001c-a4a3-455e-ab94-72d0ed11b476',
    @@ -266,27 +270,28 @@ retriever.invoke("Stealing from the bank is a cri
     

    按用户检索

    在构建检索应用程序时,您通常需要考虑到多个用户。这意味着您可能不仅要为一个用户存储数据,还要为许多不同的用户存储数据,而且这些用户不能查看彼此的数据。

    Milvus 建议使用partition_key来实现多租户,下面是一个例子。

    -

    Milvus Lite 目前不提供分区密钥功能,如果要使用该功能,需要从docker 或 kubernetes 启动 Milvus 服务器。

    +

    Milvus Lite 目前不提供Partition Key功能,如果要使用该功能,需要从docker 或 kubernetes 启动 Milvus 服务器。

    from langchain_core.documents import Document
     
     docs = [
    -    Document(page_content="i worked at kensho", metadata={"namespace": "harrison"}),
    -    Document(page_content="i worked at facebook", metadata={"namespace": "ankush"}),
    +Document(page_content="i worked at kensho", metadata={"namespace": "harrison"}),
    +Document(page_content="i worked at facebook", metadata={"namespace": "ankush"}),
     ]
     vectorstore = Milvus.from_documents(
    -    docs,
    -    embeddings,
    -    connection_args={"uri": URI},
    -    drop_old=True,
    -    partition_key_field="namespace",  # Use the "namespace" field as the partition key
    +docs,
    +embeddings,
    +connection_args={"uri": URI},
    +drop_old=True,
    +partition_key_field="namespace", # Use the "namespace" field as the partition key
     )
     
    -

    要使用分区密钥进行搜索,应在搜索请求的布尔表达式中包含以下任一内容:

    + +

    要使用Partition Key进行搜索,应在搜索请求的布尔表达式中包含以下任一内容:

    search_kwargs={"expr": '<partition_key> == "xxxx"'}

    search_kwargs={"expr": '<partition_key> == in ["xxx", "xxx"]'}

    -

    <partition_key> 替换为指定为分区密钥的字段名称。

    -

    Milvus 会根据指定的分区密钥更改分区,根据分区密钥过滤实体,并在过滤后的实体中进行搜索。

    +

    <partition_key> 替换为指定为Partition Key的字段名称。

    +

    Milvus 会根据指定的Partition Key更改分区,根据Partition Key过滤实体,并在过滤后的实体中进行搜索。

    # This will only get documents for Ankush
     vectorstore.as_retriever(search_kwargs={"expr": 'namespace == "ankush"'}).invoke(
         "where did i work?"
    diff --git a/localization/v2.4.x/site/zh/menuStructure/zh.json b/localization/v2.4.x/site/zh/menuStructure/zh.json
    index 6c12b96e7..d6049eb5d 100644
    --- a/localization/v2.4.x/site/zh/menuStructure/zh.json
    +++ b/localization/v2.4.x/site/zh/menuStructure/zh.json
    @@ -337,13 +337,13 @@
     				"children": []
     			},
     			{
    -				"label": "管理模式",
    +				"label": "管理 Schema",
     				"id": "schema.md",
     				"order": 1,
     				"children": []
     			},
     			{
    -				"label": "管理收藏",
    +				"label": "管理 Collection",
     				"id": "manage-collections.md",
     				"order": 2,
     				"children": []
    @@ -445,13 +445,13 @@
     				"isMenu": true,
     				"children": [
     					{
    -						"label": "动态领域",
    +						"label": "Dynamic Field",
     						"id": "enable-dynamic-field.md",
     						"order": 0,
     						"children": []
     					},
     					{
    -						"label": "分区密钥",
    +						"label": "Partition Key",
     						"id": "use-partition-key.md",
     						"order": 1,
     						"children": []
    diff --git a/localization/v2.4.x/site/zh/migrate/es2m.md b/localization/v2.4.x/site/zh/migrate/es2m.md
    index 40f9b0534..d2f63949c 100644
    --- a/localization/v2.4.x/site/zh/migrate/es2m.md
    +++ b/localization/v2.4.x/site/zh/migrate/es2m.md
    @@ -146,7 +146,7 @@ target:
     meta.fields.dims向量字段的尺寸。只有在meta.fields.typedense_vector 时才需要此参数。
     meta.milvus在 Milvus 2.x 中创建集合的特定配置。
     meta.milvus.collectionMilvus 集合的名称。如果未指定,默认为 Elasticsearch 索引名称。
    -meta.milvus.closeDynamicField指定是否禁用集合中的动态字段。默认为false 。有关动态字段的更多信息,请参阅启用动态字段。
    +meta.milvus.closeDynamicField指定是否禁用集合中的Dynamic Field。默认为false 。有关Dynamic Field的更多信息,请参阅启用Dynamic Fieldmeta.milvus.shardNum要在集合中创建的分片数量。有关分片的更多信息,请参阅术语meta.milvus.consistencyLevel集合在 Milvus 中的一致性级别。有关更多信息,请参阅一致性。
     
    diff --git a/localization/v2.4.x/site/zh/reference/boolean.md b/localization/v2.4.x/site/zh/reference/boolean.md
    index 9173e88af..2a1365d86 100644
    --- a/localization/v2.4.x/site/zh/reference/boolean.md
    +++ b/localization/v2.4.x/site/zh/reference/boolean.md
    @@ -3,6 +3,7 @@ id: boolean.md
     summary: 了解 Milvus 中的布尔表达规则。
     title: 标量过滤规则
     ---
    +
     

    标量过滤规则

    动态模式允许您在不修改现有模式的情况下,向集合中插入带有新字段的实体。这意味着您可以在不知道集合完整模式的情况下插入数据,并包含尚未定义的字段。在创建数据集时,可以通过启用动态字段来启用这种无模式功能。有关详细信息,请参阅启用动态字段

    +

    动态模式允许您在不修改现有模式的情况下,向集合中插入带有新字段的实体。这意味着您可以在不知道集合完整模式的情况下插入数据,并包含尚未定义的字段。在创建数据集时,可以通过启用Dynamic Field来启用这种无模式功能。有关详细信息,请参阅启用Dynamic Field

    嵌入

    字段的分区键属性可根据实体的分区键值将实体划分为不同的分区。这种分组可确保将共享相同键值的实体存储在一起,从而在使用分区键字段过滤查询时允许系统绕过不相关的分区,从而加快搜索操作。更多信息,请参阅使用分区密钥

    +

    字段的分区键属性可根据实体的分区键值将实体划分为不同的分区。这种分组可确保将共享相同键值的实体存储在一起,从而在使用分区键字段过滤查询时允许系统绕过不相关的分区,从而加快搜索操作。更多信息,请参阅使用Partition Key

    PC 通道

    模式是定义数据类型和数据属性的元信息。每个集合都有自己的集合模式,定义了集合的所有字段、自动 ID(主键)分配启用和集合描述。字段模式也包含在集合模式中,它定义了字段的名称、数据类型和其他属性。有关详细信息,请参阅管理模式

    +

    模式是定义数据类型和数据属性的元信息。每个集合都有自己的Collection Schema ,定义了集合的所有字段、自动 ID(主键)分配启用和集合描述。字段 Schema 也包含在Collection Schema 中,它定义了字段的名称、数据类型和其他属性。有关详细信息,请参阅管理 Schema

    实现面向分区的多租户也有两种可能的方法:

    每个租户一个分区

    管理单个集合比管理多个集合容易得多。与其创建多个集合,不如考虑为每个租户分配一个分区,以实现灵活的数据隔离和内存管理。面向分区的多租户搜索性能比面向集合的多租户要好得多。但需要注意的是,集合的租户数量不应超过集合可容纳分区的最大数量。

    -

    基于分区密钥的多租户

    Milvus 2.2.9 引入了名为分区密钥的新功能。创建集合时,指定一个租户字段并将其作为分区密钥字段。Milvus 将根据分区键字段中的值在分区中存储实体。在进行 ANN 搜索时,Milvus 会根据指定的分区键切换到一个分区,根据分区键过滤实体,并在过滤后的实体中进行搜索。

    +

    基于Partition Key的多租户

    Milvus 2.2.9 引入了名为Partition Key的新功能。创建集合时,指定一个租户字段并将其作为Partition Key字段。Milvus 将根据分区键字段中的值在分区中存储实体。在进行 ANN 搜索时,Milvus 会根据指定的分区键切换到一个分区,根据分区键过滤实体,并在过滤后的实体中进行搜索。

    这种策略取消了 Milvus 集合可支持的最大租户数限制,并大大简化了资源管理,因为 Milvus 会自动为你管理分区。

    总而言之,你可以使用上述任一或某些多租户策略来形成自己的解决方案。下表从数据隔离、搜索性能和最大租户数等方面对这些策略进行了比较。

    @@ -85,7 +86,7 @@ title: 多租户战略 一个集合适用于所有项目弱中等不适用适用于资源有限且对数据隔离不敏感的企业。 每个租户一个集合强强少于 10,000适用于每个群集拥有少于 10,000 个租户的情况。 每个租户一个分区中强4,096适用于每个集群租户少于 4 096 个的情况。 -基于分区密钥中强10,000,000+适用于预测租户数量会迅速增加到数百万的用户。 +基于Partition Key中强10,000,000+适用于预测租户数量会迅速增加到数百万的用户。

    下一步计划

    本主题介绍 Milvus 中的模式。模式用于定义集合的属性和其中的字段。

    -

    字段模式

    本主题介绍 Milvus 中的 Schema。Schema 用于定义集合的属性和其中的字段。

    +

    字段 Schema

    字段模式是字段的逻辑定义。在定义集合模式管理集合之前,首先需要定义它。

    +

    字段 Schema 是字段的逻辑定义。在定义Collection Schema管理 Collection之前,首先需要定义它。

    Milvus 只支持在一个集合中使用一个主键字段。

    -

    字段模式属性

    +

    字段 Schema 属性

    @@ -84,8 +85,8 @@ title: 管理模式
    属性说明 备注
    -

    创建字段模式

    为降低数据插入的复杂性,Milvus 允许在创建字段模式时为每个标量字段指定默认值,主键字段除外。这表明,如果在插入数据时某个字段为空,则您为该字段指定的默认值将适用。

    -

    创建常规字段模式:

    +

    创建字段 Schema

    为降低数据插入的复杂性,Milvus 允许在创建字段 Schema 时为每个标量字段指定默认值,主键字段除外。这表明,如果在插入数据时某个字段为空,则您为该字段指定的默认值将适用。

    +

    创建常规字段 Schema :

    from pymilvus import FieldSchema
     id_field = FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, description="primary id")
     age_field = FieldSchema(name="age", dtype=DataType.INT64, description="age")
    @@ -94,16 +95,18 @@ embedding_field = FieldSchema(name="embedding&quo
     # The following creates a field and use it as the partition key
     position_field = FieldSchema(name="position", dtype=DataType.VARCHAR, max_length=256, is_partition_key=True)
     
    -

    创建带有默认字段值的字段模式:

    + +

    创建带有默认字段值的字段 Schema :

    from pymilvus import FieldSchema
     
     fields = [
    -  FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
    -  # configure default value `25` for field `age`
    -  FieldSchema(name="age", dtype=DataType.INT64, default_value=25, description="age"),
    -  embedding_field = FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128, description="vector")
    +FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
    +# configure default value `25` for field `age`
    +FieldSchema(name="age", dtype=DataType.INT64, default_value=25, description="age"),
    +embedding_field = FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128, description="vector")
     ]
     
    +

    支持的数据类型

    DataType 定义字段包含的数据类型。不同的字段支持不同的数据类型。

    • 主键字段支持

      @@ -135,7 +138,7 @@ fields = [

    Milvus 支持在一个集合中包含多个向量场。更多信息,请参阅混合搜索

    -

    集合模式

    集合模式是集合的逻辑定义。通常,在定义集合模式和管理集合之前,需要先定义字段模式。

    -

    集合模式属性

    +

    Collection Schema 是集合的逻辑定义。通常,在定义Collection Schema 和管理集合之前,需要先定义字段Schema。

    +

    Collection Schema 属性

    @@ -174,13 +177,13 @@ fields = [ - - + +
    属性说明 备注
    enable_dynamic_field是否启用动态模式数据类型:true false
    可选,默认为 。 有关动态模式的详细信息,请参阅动态False
    模式和管理集合的用户指南。
    是否启用 Dynamic Field数据类型:true false
    可选,默认为 。 有关 Dynamic Field 的详细信息,请参阅动态False
    Schema和管理集合的用户指南。
    -

    创建集合模式

    - 在定义集合模式之前,先定义字段模式。
    +

    创建Collection Schema

    + 在定义Collection Schema 之前,先定义字段 Schema 。
    from pymilvus import FieldSchema, CollectionSchema
     id_field = FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, description="primary id")
     age_field = FieldSchema(name="age", dtype=DataType.INT64, description="age")
    @@ -192,7 +195,8 @@ position_field = FieldSchema(name="position"
     # Set enable_dynamic_field to True if you need to use dynamic fields. 
     schema = CollectionSchema(fields=[id_field, age_field, embedding_field], auto_id=False, enable_dynamic_field=True, description="desc of a collection")
     
    -

    使用指定的模式创建集合:

    + +

    使用指定的 Schema 创建 Collection:

    from pymilvus import Collection
     collection_name1 = "tutorial_1"
     collection1 = Collection(name=collection_name1, schema=schema, using='default', shards_num=2)
    @@ -202,11 +206,11 @@ collection1 = Collection(name=collection_
     
  • 可以使用shards_num 定义分区编号。
  • 通过在using 中指定别名,可以定义要创建集合的 Milvus 服务器。
  • 如果需要实施基于分区密钥的多租户,可以通过将is_partition_key 设置为True 来启用字段的分区密钥功能。
  • -
  • 如果需要启用动态字段,可以通过在集合模式中将enable_dynamic_field 设置为True 来启用动态模式。
  • +
  • 如果需要启用Dynamic Field,可以通过在Collection Schema 中将enable_dynamic_field 设置为True 来启用动态Schema。

  • -您还可以使用Collection.construct_from_dataframe 创建一个集合,它会自动从 DataFrame 生成集合模式并创建一个集合。

    +您还可以使用Collection.construct_from_dataframe 创建一个集合,它会自动从 DataFrame 生成Collection Schema 并创建一个集合。

    import pandas as pd
     df = pd.DataFrame({
         "id": [i for i in range(nb)],
    @@ -216,12 +220,13 @@ df = pd.DataFrame({
     })
     
     collection, ins_res = Collection.construct_from_dataframe(
    -    'my_collection',
    -    df,
    -    primary_field='id',
    -    auto_id=False
    -    )
    +'my_collection',
    +df,
    +primary_field='id',
    +auto_id=False
    +)
     
    +

    下一步

    diff --git a/localization/v2.4.x/site/zh/reference/sys_config/configure_proxy.md b/localization/v2.4.x/site/zh/reference/sys_config/configure_proxy.md index a3d1b5c85..0dd73ee51 100644 --- a/localization/v2.4.x/site/zh/reference/sys_config/configure_proxy.md +++ b/localization/v2.4.x/site/zh/reference/sys_config/configure_proxy.md @@ -3,8 +3,9 @@ id: configure_proxy.md related_key: configure group: system_configuration.md summary: 了解如何为 Milvus 配置代理。 -title: '' +title: "" --- +

    代理相关配置

    准备数据

    我们使用Milvus Documentation 2.4.x中的常见问题页面作为 RAG 中的私有知识,这对于简单的 RAG 管道来说是一个很好的数据源。

    @@ -134,7 +134,7 @@ collection_name = "my_rag_collection" )

    插入数据

    遍历文本行,创建嵌入,然后将数据插入 Milvus。

    -

    这里有一个新字段text ,它是集合模式中的一个未定义字段。它将被自动添加到保留的 JSON 动态字段中,在高层次上可被视为普通字段。

    +

    这里有一个新字段text ,它是Collection Schema 中的一个未定义字段。它将被自动添加到保留的 JSON Dynamic Field中,在高层次上可被视为普通字段。

    from tqdm import tqdm
     
     data = []
    diff --git a/localization/v2.4.x/site/zh/userGuide/clustering-compaction.md b/localization/v2.4.x/site/zh/userGuide/clustering-compaction.md
    index 703ed9cbd..4780843c5 100644
    --- a/localization/v2.4.x/site/zh/userGuide/clustering-compaction.md
    +++ b/localization/v2.4.x/site/zh/userGuide/clustering-compaction.md
    @@ -1,9 +1,10 @@
     ---
     id: clustering-compaction.md
     title: 聚类压缩
    -related_key: 'clustering, compaction'
    +related_key: "clustering, compaction"
     summary: 聚类压缩旨在提高搜索性能并降低大型集合的成本。本指南将帮助您了解聚类压缩以及该功能如何提高搜索性能。
     ---
    +
     

    聚类压缩

    +
    • dataCoord.compaction.clustering

      @@ -129,7 +131,7 @@ common: - +
      配置项配置项默认值
      usePartitionKeyAsClusteringKey指定是否将集合中的分区密钥用作聚类密钥。
      将其设置为true 表示将分区密钥用作聚类密钥。
      您可以在集合中通过显式设置聚类密钥来覆盖此设置。
      false
      usePartitionKeyAsClusteringKey指定是否将集合中的Partition Key用作聚类密钥。
      将其设置为true 表示将Partition Key用作聚类密钥。
      您可以在集合中通过显式设置聚类密钥来覆盖此设置。
      false
    • @@ -159,12 +161,13 @@ common: ] default_schema = CollectionSchema( - fields=default_fields, - description="test clustering-key collection" +fields=default_fields, +description="test clustering-key collection" ) coll1 = Collection(name="clustering_test", schema=default_schema)
    +

    可以使用以下数据类型的标量字段作为聚类键:Int8,Int16,Int32,Int64,Float,Double, 和VarChar

    diff --git a/localization/v2.4.x/site/zh/userGuide/data-import/prepare-source-data.md b/localization/v2.4.x/site/zh/userGuide/data-import/prepare-source-data.md index 7dd19f9a0..f6c78a1ba 100644 --- a/localization/v2.4.x/site/zh/userGuide/data-import/prepare-source-data.md +++ b/localization/v2.4.x/site/zh/userGuide/data-import/prepare-source-data.md @@ -4,6 +4,7 @@ order: 0 title: 准备源数据 summary: 本页将讨论在开始将数据批量插入数据集之前应该考虑的问题。 --- +

    准备源数据

    +
    import io.milvus.grpc.DataType;
     import io.milvus.param.collection.CollectionSchemaParam;
     import io.milvus.param.collection.FieldType;
    @@ -139,12 +141,13 @@ schema.verify()
     # when you use pymilvus earlier than 2.4.2 
     
     writer = LocalBulkWriter(
    -    schema=schema,
    -    local_path='.',
    -    segment_size=512 * 1024 * 1024, # Default value
    -    file_type=BulkFileType.PARQUET
    +schema=schema,
    +local*path='.',
    +segment_size=512 * 1024 \_ 1024, # Default value
    +file_type=BulkFileType.PARQUET
     )
     
    +
    import io.milvus.bulkwriter.LocalBulkWriter;
     import io.milvus.bulkwriter.LocalBulkWriterParam;
     import io.milvus.bulkwriter.common.clientenum.BulkFileType;
    @@ -192,13 +195,14 @@ BUCKET_NAME="milvus-bucket"
     
     # Connections parameters to access the remote bucket
     conn = RemoteBulkWriter.S3ConnectParam(
    -    endpoint="localhost:9000", # the default MinIO service started along with Milvus
    -    access_key=ACCESS_KEY,
    -    secret_key=SECRET_KEY,
    -    bucket_name=BUCKET_NAME,
    -    secure=False
    +endpoint="localhost:9000", # the default MinIO service started along with Milvus
    +access_key=ACCESS_KEY,
    +secret_key=SECRET_KEY,
    +bucket_name=BUCKET_NAME,
    +secure=False
     )
     
    +
    import io.milvus.bulkwriter.common.clientenum.BulkFileType;
     import io.milvus.bulkwriter.connect.S3ConnectParam;
     import io.milvus.bulkwriter.connect.StorageConnectParam;
    @@ -222,12 +226,13 @@ conn = RemoteBulkWriter.S3ConnectParam(
     # when you use pymilvus earlier than 2.4.2 
     
     writer = RemoteBulkWriter(
    -    schema=schema,
    -    remote_path="/",
    -    connect_param=conn,
    -    file_type=BulkFileType.PARQUET
    +schema=schema,
    +remote_path="/",
    +connect_param=conn,
    +file_type=BulkFileType.PARQUET
     )
     
    +
    import io.milvus.bulkwriter.RemoteBulkWriter;
     import io.milvus.bulkwriter.RemoteBulkWriterParam;
     
    @@ -275,21 +280,22 @@ writer = RemoteBulkWriter(
     import string
     
     def generate_random_str(length=5):
    -    letters = string.ascii_uppercase
    -    digits = string.digits
    -    
    +letters = string.ascii_uppercase
    +digits = string.digits
    +
         return ''.join(random.choices(letters + digits, k=length))
     
     for i in range(10000):
    -    writer.append_row({
    -        "id": i, 
    -        "vector": [random.uniform(-1, 1) for _ in range(768)],
    -        "scalar_1": generate_random_str(random.randint(1, 20)),
    -        "scalar_2": random.randint(0, 100)
    -    })
    -    
    +writer.append*row({
    +"id": i,
    +"vector": [random.uniform(-1, 1) for * in range(768)],
    +"scalar_1": generate_random_str(random.randint(1, 20)),
    +"scalar_2": random.randint(0, 100)
    +})
    +
     writer.commit()
     
    +
    import com.alibaba.fastjson.JSONObject;
     
     for (int i = 0; i < 10000; i++) {
    @@ -306,30 +312,31 @@ writer.commit()
     // localBulkWriter.commit(false);
     remoteBulkWriter.commit(false);
     
    -

    由于定义的模式允许动态字段,因此您也可以在要插入的数据中包含非模式定义的字段,如下所示。

    +

    由于定义的模式允许Dynamic Field,因此您也可以在要插入的数据中包含非模式定义的字段,如下所示。

    import random
     import string
     
     def generate_random_string(length=5):
    -    letters = string.ascii_uppercase
    -    digits = string.digits
    -    
    +letters = string.ascii_uppercase
    +digits = string.digits
    +
         return ''.join(random.choices(letters + digits, k=length))
     
     for i in range(10000):
    -    writer.append_row({
    -        "id": i, 
    -        "vector":[random.uniform(-1, 1) for _ in range(768)],
    -        "scalar_1": generate_random_string(),
    -        "scalar_2": random.randint(0, 100),
    -        "dynamic_field_1": random.choice([True, False]),
    -        "dynamic_field_2": random.randint(0, 100)
    -    })
    -    
    +writer.append*row({
    +"id": i,
    +"vector":[random.uniform(-1, 1) for * in range(768)],
    +"scalar_1": generate_random_string(),
    +"scalar_2": random.randint(0, 100),
    +"dynamic_field_1": random.choice([True, False]),
    +"dynamic_field_2": random.randint(0, 100)
    +})
    +
     writer.commit()
     
    +
    for (int i = 0; i < 10000; i++) {
         JSONObject json = new JSONObject();
         json.put("id", i);
    @@ -374,6 +381,7 @@ remoteBulkWriter.commit(# [['d4220a9e-45be-4ccb-8cb5-bf09304b9f23/1.parquet'],
     #  ['d4220a9e-45be-4ccb-8cb5-bf09304b9f23/2.parquet']]
     
    +
    // localBulkWriter.getBatchFiles();
     remoteBulkWriter.getBatchFiles();
     
    @@ -393,10 +401,10 @@ remoteBulkWriter.getBatchFiles();
     
    # JSON
     ├── folder
     │   └── 45ae1139-1d87-4aff-85f5-0039111f9e6b
    -│       └── 1.json 
    +│       └── 1.json
     
     # Parquet
     ├── folder
    -│   └── 45ae1139-1d87-4aff-85f5-0039111f9e6b
    -│       └── 1.parquet 
    +│ └── 45ae1139-1d87-4aff-85f5-0039111f9e6b
    +│ └── 1.parquet
     
    diff --git a/localization/v2.4.x/site/zh/userGuide/enable-dynamic-field.json b/localization/v2.4.x/site/zh/userGuide/enable-dynamic-field.json index 2c09acfa8..31eb17d6f 100644 --- a/localization/v2.4.x/site/zh/userGuide/enable-dynamic-field.json +++ b/localization/v2.4.x/site/zh/userGuide/enable-dynamic-field.json @@ -1 +1,47 @@ -{"codeList":["import random, time\nfrom pymilvus import connections, MilvusClient, DataType\n\nSERVER_ADDR = \"http://localhost:19530\"\n\n# 1. Set up a Milvus client\nclient = MilvusClient(\n uri=SERVER_ADDR\n)\n\n# 2. Create a collection\nschema = MilvusClient.create_schema(\n auto_id=False,\n enable_dynamic_field=True,\n)\n\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True)\nschema.add_field(field_name=\"vector\", datatype=DataType.FLOAT_VECTOR, dim=5)\n\nindex_params = MilvusClient.prepare_index_params()\n\nindex_params.add_index(\n field_name=\"id\",\n index_type=\"STL_SORT\"\n)\n\nindex_params.add_index(\n field_name=\"vector\",\n index_type=\"IVF_FLAT\",\n metric_type=\"L2\",\n params={\"nlist\": 1024}\n)\n\nclient.create_collection(\n collection_name=\"test_collection\",\n schema=schema,\n index_params=index_params\n)\n\nres = client.get_load_state(\n collection_name=\"test_collection\"\n)\n\nprint(res)\n\n# Output\n#\n# {\n# \"state\": \"\"\n# }\n","import io.milvus.v2.client.ConnectConfig;\nimport io.milvus.v2.client.MilvusClientV2;\nimport io.milvus.v2.common.DataType;\nimport io.milvus.v2.common.IndexParam;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\nimport io.milvus.v2.service.collection.request.GetLoadStateReq;\n\nString CLUSTER_ENDPOINT = \"http://localhost:19530\";\n\n// 1. Connect to Milvus server\nConnectConfig connectConfig = ConnectConfig.builder()\n .uri(CLUSTER_ENDPOINT)\n .build();\n\nMilvusClientV2 client = new MilvusClientV2(connectConfig);\n\n// 2. Create a collection in customized setup mode\n\n// 2.1 Create schema\nCreateCollectionReq.CollectionSchema schema = client.createSchema();\n\n// 2.2 Add fields to schema\nschema.addField(AddFieldReq.builder().fieldName(\"id\").dataType(DataType.Int64).isPrimaryKey(true).autoID(false).build());\nschema.addField(AddFieldReq.builder().fieldName(\"vector\").dataType(DataType.FloatVector).dimension(5).build());\n\n// 2.3 Prepare index parameters\nIndexParam indexParamForIdField = IndexParam.builder()\n .fieldName(\"id\")\n .indexType(IndexParam.IndexType.STL_SORT)\n .build();\n\nIndexParam indexParamForVectorField = IndexParam.builder()\n .fieldName(\"vector\")\n .indexType(IndexParam.IndexType.IVF_FLAT)\n .metricType(IndexParam.MetricType.IP)\n .extraParams(Map.of(\"nlist\", 1024))\n .build();\n\nList indexParams = new ArrayList<>();\nindexParams.add(indexParamForIdField);\nindexParams.add(indexParamForVectorField);\n\n// 2.4 Create a collection with schema and index parameters\nCreateCollectionReq customizedSetupReq = CreateCollectionReq.builder()\n .collectionName(\"customized_setup\")\n .collectionSchema(schema)\n .indexParams(indexParams)\n .enableDynamicField(true)\n .build();\n\nclient.createCollection(customizedSetupReq);\n\nThread.sleep(5000);\n\n// 2.5 Get load state of the collection\nGetLoadStateReq customSetupLoadStateReq1 = GetLoadStateReq.builder()\n .collectionName(\"customized_setup\")\n .build();\n\nboolean res = client.getLoadState(customSetupLoadStateReq1);\n\nSystem.out.println(res);\n\n// Output:\n// true\n","const { MilvusClient, DataType, sleep } = require(\"@zilliz/milvus2-sdk-node\")\n\nconst address = \"http://localhost:19530\"\n\nasync function main() {\n// 1. Set up a Milvus Client\nclient = new MilvusClient({address}); \n\n// 2. Create a collection\n// 2.1 Define fields\nconst fields = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n auto_id: false\n },\n {\n name: \"vector\",\n data_type: DataType.FloatVector,\n dim: 5\n },\n]\n\n// 2.2 Prepare index parameters\nconst index_params = [{\n field_name: \"id\",\n index_type: \"STL_SORT\"\n},{\n field_name: \"vector\",\n index_type: \"IVF_FLAT\",\n metric_type: \"IP\",\n params: { nlist: 1024}\n}]\n\n// 2.3 Create a collection with fields and index parameters\nres = await client.createCollection({\n collection_name: \"test_collection\",\n fields: fields, \n index_params: index_params,\n enable_dynamic_field: true\n})\n\nconsole.log(res.error_code)\n\n// Output\n// \n// Success\n// \n\nres = await client.getLoadState({\n collection_name: \"test_collection\",\n}) \n\nconsole.log(res.state)\n\n// Output\n// \n// LoadStateLoaded\n// \n","colors = [\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\"]\ndata = []\n\nfor i in range(1000):\n current_color = random.choice(colors)\n current_tag = random.randint(1000, 9999)\n data.append({\n \"id\": i,\n \"vector\": [ random.uniform(-1, 1) for _ in range(5) ],\n \"color\": current_color,\n \"tag\": current_tag,\n \"color_tag\": f\"{current_color}_{str(current_tag)}\"\n })\n\nprint(data[0])\n","List colors = Arrays.asList(\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\");\nList data = new ArrayList<>();\n\nfor (int i=0; i<1000; i++) {\n Random rand = new Random();\n String current_color = colors.get(rand.nextInt(colors.size()-1));\n int current_tag = rand.nextInt(8999) + 1000;\n JSONObject row = new JSONObject();\n row.put(\"id\", Long.valueOf(i));\n row.put(\"vector\", Arrays.asList(rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat()));\n row.put(\"color\", current_color);\n row.put(\"tag\", current_tag);\n row.put(\"color_tag\", current_color + \"_\" + String.valueOf(rand.nextInt(8999) + 1000));\n data.add(row);\n}\n\nSystem.out.println(JSONObject.toJSON(data.get(0)));\n","const colors = [\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\"]\nvar data = []\n\nfor (let i = 0; i < 1000; i++) {\n const current_color = colors[Math.floor(Math.random() * colors.length)]\n const current_tag = Math.floor(Math.random() * 8999 + 1000)\n data.push({\n id: i,\n vector: [Math.random(), Math.random(), Math.random(), Math.random(), Math.random()],\n color: current_color,\n tag: current_tag,\n color_tag: `${current_color}_${current_tag}`\n })\n}\n\nconsole.log(data[0])\n","{\n id: 0,\n vector: [\n 0.1275656405044483,\n 0.47417858592773277,\n 0.13858264437643286,\n 0.2390904907020377,\n 0.8447862593689635\n ],\n color: 'blue',\n tag: 2064,\n color_tag: 'blue_2064'\n}\n","res = client.insert(\n collection_name=\"test_collection\",\n data=data,\n)\n\nprint(res)\n\n# Output\n#\n# {\n# \"insert_count\": 1000,\n# \"ids\": [\n# 0,\n# 1,\n# 2,\n# 3,\n# 4,\n# 5,\n# 6,\n# 7,\n# 8,\n# 9,\n# \"(990 more items hidden)\"\n# ]\n# }\n\ntime.sleep(5)\n","// 3.1 Insert data into the collection\nInsertReq insertReq = InsertReq.builder()\n .collectionName(\"customized_setup\")\n .data(data)\n .build();\n\nInsertResp insertResp = client.insert(insertReq);\n\nSystem.out.println(JSONObject.toJSON(insertResp));\n\n// Output:\n// {\"insertCnt\": 1000}\n\nThread.sleep(5000);\n","res = await client.insert({\n collection_name: \"test_collection\",\n data: data,\n})\n\nconsole.log(res.insert_cnt)\n\n// Output\n// \n// 1000\n// \n\nawait sleep(5000)\n","# 4. Search with dynamic fields\nquery_vectors = [[0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]]\n\nres = client.search(\n collection_name=\"test_collection\",\n data=query_vectors,\n filter=\"color in [\\\"red\\\", \\\"green\\\"]\",\n search_params={\"metric_type\": \"L2\", \"params\": {\"nprobe\": 10}},\n limit=3\n)\n\nprint(res)\n\n# Output\n#\n# [\n# [\n# {\n# \"id\": 863,\n# \"distance\": 0.188413605093956,\n# \"entity\": {\n# \"id\": 863,\n# \"color_tag\": \"red_2371\"\n# }\n# },\n# {\n# \"id\": 799,\n# \"distance\": 0.29188022017478943,\n# \"entity\": {\n# \"id\": 799,\n# \"color_tag\": \"red_2235\"\n# }\n# },\n# {\n# \"id\": 564,\n# \"distance\": 0.3492690920829773,\n# \"entity\": {\n# \"id\": 564,\n# \"color_tag\": \"red_9186\"\n# }\n# }\n# ]\n# ]\n","// 4. Search with non-schema-defined fields\nList> queryVectors = Arrays.asList(Arrays.asList(0.3580376395471989f, -0.6023495712049978f, 0.18414012509913835f, -0.26286205330961354f, 0.9029438446296592f));\n\nSearchReq searchReq = SearchReq.builder()\n .collectionName(\"customized_setup\")\n .data(queryVectors)\n .filter(\"$meta[\\\"color\\\"] in [\\\"red\\\", \\\"green\\\"]\")\n .outputFields(List.of(\"id\", \"color_tag\"))\n .topK(3)\n .build();\n\nSearchResp searchResp = client.search(searchReq);\n\nSystem.out.println(JSONObject.toJSON(searchResp));\n\n// Output:\n// {\"searchResults\": [[\n// {\n// \"distance\": 1.3159835,\n// \"id\": 979,\n// \"entity\": {\n// \"color_tag\": \"red_7155\",\n// \"id\": 979\n// }\n// },\n// {\n// \"distance\": 1.0744804,\n// \"id\": 44,\n// \"entity\": {\n// \"color_tag\": \"green_8006\",\n// \"id\": 44\n// }\n// },\n// {\n// \"distance\": 1.0060014,\n// \"id\": 617,\n// \"entity\": {\n// \"color_tag\": \"red_4056\",\n// \"id\": 617\n// }\n// }\n// ]]}\n","// 4. Search with non-schema-defined fields\nconst query_vectors = [[0.1, 0.2, 0.3, 0.4, 0.5]]\n\nres = await client.search({\n collection_name: \"test_collection\",\n data: query_vectors,\n filter: \"color in [\\\"red\\\", \\\"green\\\"]\",\n output_fields: [\"color_tag\"],\n limit: 3\n})\n\nconsole.log(res.results)\n\n// Output\n// \n// [\n// { score: 1.2284551858901978, id: '301', color_tag: 'red_1270' },\n// { score: 1.2195171117782593, id: '205', color_tag: 'red_2780' },\n// { score: 1.2055039405822754, id: '487', color_tag: 'red_6653' }\n// ]\n// \n","... \nfilter='$meta[\"$key\"] in [\"a\", \"b\", \"c\"]', \noutput_fields='$meta[\"$key\"]' \n...\n"],"headingContent":"","anchorList":[{"label":"启用动态字段","href":"Enable-Dynamic-Field","type":1,"isActive":false},{"label":"概述","href":"Overview","type":2,"isActive":false},{"label":"启用动态字段","href":"Enable-dynamic-field","type":2,"isActive":false},{"label":"插入动态数据","href":"Insert-dynamic-data","type":2,"isActive":false},{"label":"使用动态字段搜索","href":"Search-with-dynamic-fields","type":2,"isActive":false},{"label":"重述","href":"Recaps","type":2,"isActive":false}]} \ No newline at end of file +{ + "codeList": [ + "import random, time\nfrom pymilvus import connections, MilvusClient, DataType\n\nSERVER_ADDR = \"http://localhost:19530\"\n\n# 1. Set up a Milvus client\nclient = MilvusClient(\n uri=SERVER_ADDR\n)\n\n# 2. Create a collection\nschema = MilvusClient.create_schema(\n auto_id=False,\n enable_dynamic_field=True,\n)\n\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True)\nschema.add_field(field_name=\"vector\", datatype=DataType.FLOAT_VECTOR, dim=5)\n\nindex_params = MilvusClient.prepare_index_params()\n\nindex_params.add_index(\n field_name=\"id\",\n index_type=\"STL_SORT\"\n)\n\nindex_params.add_index(\n field_name=\"vector\",\n index_type=\"IVF_FLAT\",\n metric_type=\"L2\",\n params={\"nlist\": 1024}\n)\n\nclient.create_collection(\n collection_name=\"test_collection\",\n schema=schema,\n index_params=index_params\n)\n\nres = client.get_load_state(\n collection_name=\"test_collection\"\n)\n\nprint(res)\n\n# Output\n#\n# {\n# \"state\": \"\"\n# }\n", + "import io.milvus.v2.client.ConnectConfig;\nimport io.milvus.v2.client.MilvusClientV2;\nimport io.milvus.v2.common.DataType;\nimport io.milvus.v2.common.IndexParam;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\nimport io.milvus.v2.service.collection.request.GetLoadStateReq;\n\nString CLUSTER_ENDPOINT = \"http://localhost:19530\";\n\n// 1. Connect to Milvus server\nConnectConfig connectConfig = ConnectConfig.builder()\n .uri(CLUSTER_ENDPOINT)\n .build();\n\nMilvusClientV2 client = new MilvusClientV2(connectConfig);\n\n// 2. Create a collection in customized setup mode\n\n// 2.1 Create schema\nCreateCollectionReq.CollectionSchema schema = client.createSchema();\n\n// 2.2 Add fields to schema\nschema.addField(AddFieldReq.builder().fieldName(\"id\").dataType(DataType.Int64).isPrimaryKey(true).autoID(false).build());\nschema.addField(AddFieldReq.builder().fieldName(\"vector\").dataType(DataType.FloatVector).dimension(5).build());\n\n// 2.3 Prepare index parameters\nIndexParam indexParamForIdField = IndexParam.builder()\n .fieldName(\"id\")\n .indexType(IndexParam.IndexType.STL_SORT)\n .build();\n\nIndexParam indexParamForVectorField = IndexParam.builder()\n .fieldName(\"vector\")\n .indexType(IndexParam.IndexType.IVF_FLAT)\n .metricType(IndexParam.MetricType.IP)\n .extraParams(Map.of(\"nlist\", 1024))\n .build();\n\nList indexParams = new ArrayList<>();\nindexParams.add(indexParamForIdField);\nindexParams.add(indexParamForVectorField);\n\n// 2.4 Create a collection with schema and index parameters\nCreateCollectionReq customizedSetupReq = CreateCollectionReq.builder()\n .collectionName(\"customized_setup\")\n .collectionSchema(schema)\n .indexParams(indexParams)\n .enableDynamicField(true)\n .build();\n\nclient.createCollection(customizedSetupReq);\n\nThread.sleep(5000);\n\n// 2.5 Get load state of the collection\nGetLoadStateReq customSetupLoadStateReq1 = GetLoadStateReq.builder()\n .collectionName(\"customized_setup\")\n .build();\n\nboolean res = client.getLoadState(customSetupLoadStateReq1);\n\nSystem.out.println(res);\n\n// Output:\n// true\n", + "const { MilvusClient, DataType, sleep } = require(\"@zilliz/milvus2-sdk-node\")\n\nconst address = \"http://localhost:19530\"\n\nasync function main() {\n// 1. Set up a Milvus Client\nclient = new MilvusClient({address}); \n\n// 2. Create a collection\n// 2.1 Define fields\nconst fields = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n auto_id: false\n },\n {\n name: \"vector\",\n data_type: DataType.FloatVector,\n dim: 5\n },\n]\n\n// 2.2 Prepare index parameters\nconst index_params = [{\n field_name: \"id\",\n index_type: \"STL_SORT\"\n},{\n field_name: \"vector\",\n index_type: \"IVF_FLAT\",\n metric_type: \"IP\",\n params: { nlist: 1024}\n}]\n\n// 2.3 Create a collection with fields and index parameters\nres = await client.createCollection({\n collection_name: \"test_collection\",\n fields: fields, \n index_params: index_params,\n enable_dynamic_field: true\n})\n\nconsole.log(res.error_code)\n\n// Output\n// \n// Success\n// \n\nres = await client.getLoadState({\n collection_name: \"test_collection\",\n}) \n\nconsole.log(res.state)\n\n// Output\n// \n// LoadStateLoaded\n// \n", + "colors = [\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\"]\ndata = []\n\nfor i in range(1000):\n current_color = random.choice(colors)\n current_tag = random.randint(1000, 9999)\n data.append({\n \"id\": i,\n \"vector\": [ random.uniform(-1, 1) for _ in range(5) ],\n \"color\": current_color,\n \"tag\": current_tag,\n \"color_tag\": f\"{current_color}_{str(current_tag)}\"\n })\n\nprint(data[0])\n", + "List colors = Arrays.asList(\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\");\nList data = new ArrayList<>();\n\nfor (int i=0; i<1000; i++) {\n Random rand = new Random();\n String current_color = colors.get(rand.nextInt(colors.size()-1));\n int current_tag = rand.nextInt(8999) + 1000;\n JSONObject row = new JSONObject();\n row.put(\"id\", Long.valueOf(i));\n row.put(\"vector\", Arrays.asList(rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat()));\n row.put(\"color\", current_color);\n row.put(\"tag\", current_tag);\n row.put(\"color_tag\", current_color + \"_\" + String.valueOf(rand.nextInt(8999) + 1000));\n data.add(row);\n}\n\nSystem.out.println(JSONObject.toJSON(data.get(0)));\n", + "const colors = [\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\"]\nvar data = []\n\nfor (let i = 0; i < 1000; i++) {\n const current_color = colors[Math.floor(Math.random() * colors.length)]\n const current_tag = Math.floor(Math.random() * 8999 + 1000)\n data.push({\n id: i,\n vector: [Math.random(), Math.random(), Math.random(), Math.random(), Math.random()],\n color: current_color,\n tag: current_tag,\n color_tag: `${current_color}_${current_tag}`\n })\n}\n\nconsole.log(data[0])\n", + "{\n id: 0,\n vector: [\n 0.1275656405044483,\n 0.47417858592773277,\n 0.13858264437643286,\n 0.2390904907020377,\n 0.8447862593689635\n ],\n color: 'blue',\n tag: 2064,\n color_tag: 'blue_2064'\n}\n", + "res = client.insert(\n collection_name=\"test_collection\",\n data=data,\n)\n\nprint(res)\n\n# Output\n#\n# {\n# \"insert_count\": 1000,\n# \"ids\": [\n# 0,\n# 1,\n# 2,\n# 3,\n# 4,\n# 5,\n# 6,\n# 7,\n# 8,\n# 9,\n# \"(990 more items hidden)\"\n# ]\n# }\n\ntime.sleep(5)\n", + "// 3.1 Insert data into the collection\nInsertReq insertReq = InsertReq.builder()\n .collectionName(\"customized_setup\")\n .data(data)\n .build();\n\nInsertResp insertResp = client.insert(insertReq);\n\nSystem.out.println(JSONObject.toJSON(insertResp));\n\n// Output:\n// {\"insertCnt\": 1000}\n\nThread.sleep(5000);\n", + "res = await client.insert({\n collection_name: \"test_collection\",\n data: data,\n})\n\nconsole.log(res.insert_cnt)\n\n// Output\n// \n// 1000\n// \n\nawait sleep(5000)\n", + "# 4. Search with dynamic fields\nquery_vectors = [[0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]]\n\nres = client.search(\n collection_name=\"test_collection\",\n data=query_vectors,\n filter=\"color in [\\\"red\\\", \\\"green\\\"]\",\n search_params={\"metric_type\": \"L2\", \"params\": {\"nprobe\": 10}},\n limit=3\n)\n\nprint(res)\n\n# Output\n#\n# [\n# [\n# {\n# \"id\": 863,\n# \"distance\": 0.188413605093956,\n# \"entity\": {\n# \"id\": 863,\n# \"color_tag\": \"red_2371\"\n# }\n# },\n# {\n# \"id\": 799,\n# \"distance\": 0.29188022017478943,\n# \"entity\": {\n# \"id\": 799,\n# \"color_tag\": \"red_2235\"\n# }\n# },\n# {\n# \"id\": 564,\n# \"distance\": 0.3492690920829773,\n# \"entity\": {\n# \"id\": 564,\n# \"color_tag\": \"red_9186\"\n# }\n# }\n# ]\n# ]\n", + "// 4. Search with non-schema-defined fields\nList> queryVectors = Arrays.asList(Arrays.asList(0.3580376395471989f, -0.6023495712049978f, 0.18414012509913835f, -0.26286205330961354f, 0.9029438446296592f));\n\nSearchReq searchReq = SearchReq.builder()\n .collectionName(\"customized_setup\")\n .data(queryVectors)\n .filter(\"$meta[\\\"color\\\"] in [\\\"red\\\", \\\"green\\\"]\")\n .outputFields(List.of(\"id\", \"color_tag\"))\n .topK(3)\n .build();\n\nSearchResp searchResp = client.search(searchReq);\n\nSystem.out.println(JSONObject.toJSON(searchResp));\n\n// Output:\n// {\"searchResults\": [[\n// {\n// \"distance\": 1.3159835,\n// \"id\": 979,\n// \"entity\": {\n// \"color_tag\": \"red_7155\",\n// \"id\": 979\n// }\n// },\n// {\n// \"distance\": 1.0744804,\n// \"id\": 44,\n// \"entity\": {\n// \"color_tag\": \"green_8006\",\n// \"id\": 44\n// }\n// },\n// {\n// \"distance\": 1.0060014,\n// \"id\": 617,\n// \"entity\": {\n// \"color_tag\": \"red_4056\",\n// \"id\": 617\n// }\n// }\n// ]]}\n", + "// 4. Search with non-schema-defined fields\nconst query_vectors = [[0.1, 0.2, 0.3, 0.4, 0.5]]\n\nres = await client.search({\n collection_name: \"test_collection\",\n data: query_vectors,\n filter: \"color in [\\\"red\\\", \\\"green\\\"]\",\n output_fields: [\"color_tag\"],\n limit: 3\n})\n\nconsole.log(res.results)\n\n// Output\n// \n// [\n// { score: 1.2284551858901978, id: '301', color_tag: 'red_1270' },\n// { score: 1.2195171117782593, id: '205', color_tag: 'red_2780' },\n// { score: 1.2055039405822754, id: '487', color_tag: 'red_6653' }\n// ]\n// \n", + "... \nfilter='$meta[\"$key\"] in [\"a\", \"b\", \"c\"]', \noutput_fields='$meta[\"$key\"]' \n...\n" + ], + "headingContent": "", + "anchorList": [ + { + "label": "启用Dynamic Field", + "href": "Enable-Dynamic-Field", + "type": 1, + "isActive": false + }, + { "label": "概述", "href": "Overview", "type": 2, "isActive": false }, + { + "label": "启用Dynamic Field", + "href": "Enable-dynamic-field", + "type": 2, + "isActive": false + }, + { + "label": "插入动态数据", + "href": "Insert-dynamic-data", + "type": 2, + "isActive": false + }, + { + "label": "使用Dynamic Field搜索", + "href": "Search-with-dynamic-fields", + "type": 2, + "isActive": false + }, + { "label": "重述", "href": "Recaps", "type": 2, "isActive": false } + ] +} diff --git a/localization/v2.4.x/site/zh/userGuide/enable-dynamic-field.md b/localization/v2.4.x/site/zh/userGuide/enable-dynamic-field.md index 28ee10ea8..f6e61882b 100644 --- a/localization/v2.4.x/site/zh/userGuide/enable-dynamic-field.md +++ b/localization/v2.4.x/site/zh/userGuide/enable-dynamic-field.md @@ -1,9 +1,10 @@ --- id: enable-dynamic-field.md -title: 启用动态字段 -summary: '' +title: 启用Dynamic Field +summary: "" --- -

    启用动态字段

    本页介绍如何在集合中使用动态字段,以灵活插入和检索数据。

    +

    本页介绍如何在集合中使用Dynamic Field,以灵活插入和检索数据。

    概述

    Milvus 允许你通过设置每个特定字段的名称和数据类型来定义集合的模式,以便在这些字段中创建索引,提高搜索性能。

    -

    一旦定义了字段,您就需要在插入数据时包含该字段。如果某些字段并不总是出现在所有数据条目中,该怎么办?这就是动态字段的用武之地。

    -

    集合中的动态字段是一个名为 $meta 的保留 JSON 字段。它可以将非模式定义的字段及其值作为键值对保存。使用动态字段,您可以搜索和查询模式定义的字段以及它们可能包含的任何非模式定义的字段。

    -

    启用动态字段

    在为集合定义模式时,可将enable_dynamic_field 设置为True 以启用预留动态字段,这表示以后插入的任何非模式定义字段及其值都将作为键值对保存在预留动态字段中。

    -

    下面的代码段创建了一个带有两个模式定义字段(即 id 和向量)的集合,并启用了动态字段。

    +

    在为集合定义模式时,可将enable_dynamic_field 设置为True 以启用预留Dynamic Field,这表示以后插入的任何非模式定义字段及其值都将作为键值对保存在预留Dynamic Field中。

    +

    下面的代码段创建了一个带有两个模式定义字段(即 id 和向量)的集合,并启用了Dynamic Field。

    有关参数的更多信息,请参阅 create_collection()有关参数的更多信息,请参阅 SDK 参考资料中的

    @@ -72,13 +73,13 @@ SERVER_ADDR = "http://localhost:19530"# 1. Set up a Milvus client client = MilvusClient( - uri=SERVER_ADDR +uri=SERVER_ADDR ) # 2. Create a collection schema = MilvusClient.create_schema( - auto_id=False, - enable_dynamic_field=True, +auto_id=False, +enable_dynamic_field=True, ) schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True) @@ -87,25 +88,25 @@ schema.add_field(field_name="vector", index_params = MilvusClient.prepare_index_params() index_params.add_index( - field_name="id", - index_type="STL_SORT" +field_name="id", +index_type="STL_SORT" ) index_params.add_index( - field_name="vector", - index_type="IVF_FLAT", - metric_type="L2", - params={"nlist": 1024} +field_name="vector", +index_type="IVF_FLAT", +metric_type="L2", +params={"nlist": 1024} ) client.create_collection( - collection_name="test_collection", - schema=schema, - index_params=index_params +collection_name="test_collection", +schema=schema, +index_params=index_params ) res = client.get_load_state( - collection_name="test_collection" +collection_name="test_collection" ) print(res) @@ -113,9 +114,10 @@ res = client.get_load_state( # Output # # { -# "state": "<LoadState: Loaded>" +# "state": "<LoadState: Loaded>" # }
    +
    import io.milvus.v2.client.ConnectConfig;
     import io.milvus.v2.client.MilvusClientV2;
     import io.milvus.v2.common.DataType;
    @@ -267,18 +269,19 @@ res = await client.for i in range(1000):
    -    current_color = random.choice(colors)
    -    current_tag = random.randint(1000, 9999)
    -    data.append({
    -        "id": i,
    -        "vector": [ random.uniform(-1, 1) for _ in range(5) ],
    -        "color": current_color,
    -        "tag": current_tag,
    -        "color_tag": f"{current_color}_{str(current_tag)}"
    -    })
    +current*color = random.choice(colors)
    +current_tag = random.randint(1000, 9999)
    +data.append({
    +"id": i,
    +"vector": [ random.uniform(-1, 1) for * in range(5) ],
    +"color": current*color,
    +"tag": current_tag,
    +"color_tag": f"{current_color}*{str(current_tag)}"
    +})
     
     print(data[0])
     
    +
    List<String> colors = Arrays.asList("green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey");
     List<JSONObject> data = new ArrayList<>();
     
    @@ -351,24 +354,25 @@ data = []
     # Output
     #
     # {
    -#     "insert_count": 1000,
    -#     "ids": [
    -#         0,
    -#         1,
    -#         2,
    -#         3,
    -#         4,
    -#         5,
    -#         6,
    -#         7,
    -#         8,
    -#         9,
    -#         "(990 more items hidden)"
    -#     ]
    +# "insert_count": 1000,
    +# "ids": [
    +# 0,
    +# 1,
    +# 2,
    +# 3,
    +# 4,
    +# 5,
    +# 6,
    +# 7,
    +# 8,
    +# 9,
    +# "(990 more items hidden)"
    +# ]
     # }
     
     time.sleep(5)
     
    +
    // 3.1 Insert data into the collection
     InsertReq insertReq = InsertReq.builder()
         .collectionName("customized_setup")
    @@ -398,7 +402,7 @@ time.sleep(5)
     
     await sleep(5000)
     
    -

    使用动态字段搜索

    如果在创建集合时启用了动态字段并插入了非模式定义的字段,则可以在搜索或查询的筛选表达式中使用这些字段,如下所示。

    +

    如果在创建集合时启用了Dynamic Field并插入了非模式定义的字段,则可以在搜索或查询的筛选表达式中使用这些字段,如下所示。

    有关参数的更多信息,请参阅 search()有关参数的更多信息,请参阅 SDK 参考资料中的

    @@ -429,11 +433,11 @@ time.sleep(5) query_vectors = [[0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]] res = client.search( - collection_name="test_collection", - data=query_vectors, - filter="color in [\"red\", \"green\"]", - search_params={"metric_type": "L2", "params": {"nprobe": 10}}, - limit=3 +collection_name="test_collection", +data=query_vectors, +filter="color in [\"red\", \"green\"]", +search_params={"metric_type": "L2", "params": {"nprobe": 10}}, +limit=3 ) print(res) @@ -441,34 +445,35 @@ res = client.search( # Output # # [ -# [ -# { -# "id": 863, -# "distance": 0.188413605093956, -# "entity": { -# "id": 863, -# "color_tag": "red_2371" -# } -# }, -# { -# "id": 799, -# "distance": 0.29188022017478943, -# "entity": { -# "id": 799, -# "color_tag": "red_2235" -# } -# }, -# { -# "id": 564, -# "distance": 0.3492690920829773, -# "entity": { -# "id": 564, -# "color_tag": "red_9186" -# } -# } -# ] +# [ +# { +# "id": 863, +# "distance": 0.188413605093956, +# "entity": { +# "id": 863, +# "color_tag": "red_2371" +# } +# }, +# { +# "id": 799, +# "distance": 0.29188022017478943, +# "entity": { +# "id": 799, +# "color_tag": "red_2235" +# } +# }, +# { +# "id": 564, +# "distance": 0.3492690920829773, +# "entity": { +# "id": 564, +# "color_tag": "red_9186" +# } +# } +# ] # ]
    +
    // 4. Search with non-schema-defined fields
     List<List<Float>> queryVectors = Arrays.asList(Arrays.asList(0.3580376395471989f, -0.6023495712049978f, 0.18414012509913835f, -0.26286205330961354f, 0.9029438446296592f));
     
    @@ -549,7 +554,7 @@ res = await client.
           
    -    

    值得注意的是,colortagcolor_tag在定义集合模式时并不存在,但您可以在进行搜索和查询时将它们用作模式定义的字段。

    +

    值得注意的是,colortagcolor_tag在定义Collection Schema 时并不存在,但您可以在进行搜索和查询时将它们用作模式定义的字段。

    如果非模式定义字段的名称包含除数字、字母和下划线以外的字符,如加号 (+)、星号 (*) 或美元符号 ($),则在布尔表达式中使用该字段或将其包含在输出字段中时,必须将该键包含在$meta[]中,如以下代码片段所示。

    ... 
     filter='$meta["$key"] in ["a", "b", "c"]', 
    diff --git a/localization/v2.4.x/site/zh/userGuide/insert-update-delete.md b/localization/v2.4.x/site/zh/userGuide/insert-update-delete.md
    index 4a9c072b6..9f30d93c2 100644
    --- a/localization/v2.4.x/site/zh/userGuide/insert-update-delete.md
    +++ b/localization/v2.4.x/site/zh/userGuide/insert-update-delete.md
    @@ -3,6 +3,7 @@ id: insert-update-delete.md
     summary: 本指南将引导您了解集合中的数据操作,包括插入、向上插入和删除。
     title: 插入、倒置和删除
     ---
    +
     

    插入、上插和删除

    在 Milvus 数据集中,实体是一个数据集中可识别的单一实例。它代表特定类别中的一个独特成员,无论是图书馆中的一本书、基因组中的一个基因,还是任何其他可识别的实体。

    集合中的实体共享一组共同的属性,称为模式,概述了每个实体必须遵守的结构,包括字段名称、数据类型和任何其他限制。

    -

    要将实体成功插入到集合中,所提供的数据必须包含目标集合的所有模式定义字段。此外,只有启用了动态字段,才能包含非模式定义的字段。有关详细信息,请参阅启用动态字段

    +

    要将实体成功插入到集合中,所提供的数据必须包含目标集合的所有模式定义字段。此外,只有启用了Dynamic Field,才能包含非模式定义的字段。有关详细信息,请参阅启用Dynamic Field

    准备工作

    +
    import io.milvus.v2.client.ConnectConfig;
     import io.milvus.v2.client.MilvusClientV2;
     import io.milvus.v2.service.collection.request.CreateCollectionReq;
    @@ -157,7 +159,7 @@ client = new M
               d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"
             >
           
    -    

    要插入实体,需要将数据组织成一个字典列表,其中每个字典代表一个实体。每个字典都包含与目标集合中预定义字段和动态字段相对应的键。

    +

    要插入实体,需要将数据组织成一个字典列表,其中每个字典代表一个实体。每个字典都包含与目标集合中预定义字段和Dynamic Field相对应的键。

    要在集合中插入实体,请使用 insert()方法。

    @@ -184,8 +186,8 @@ data=[ ] res = client.insert( - collection_name="quick_setup", - data=data +collection_name="quick_setup", +data=data ) print(res) @@ -193,21 +195,22 @@ res = client.insert( # Output # # { -# "insert_count": 10, -# "ids": [ -# 0, -# 1, -# 2, -# 3, -# 4, -# 5, -# 6, -# 7, -# 8, -# 9 -# ] +# "insert_count": 10, +# "ids": [ +# 0, +# 1, +# 2, +# 3, +# 4, +# 5, +# 6, +# 7, +# 8, +# 9 +# ] # }
    +
    import java.util.Arrays;
     import java.util.List;
     import java.util.Map;
    @@ -285,14 +288,14 @@ data=[
     ]
     
     client.create_partition(
    -    collection_name="quick_setup",
    -    partition_name="partitionA"
    +collection_name="quick_setup",
    +partition_name="partitionA"
     )
     
     res = client.insert(
    -    collection_name="quick_setup",
    -    data=data,
    -    partition_name="partitionA"
    +collection_name="quick_setup",
    +data=data,
    +partition_name="partitionA"
     )
     
     print(res)
    @@ -300,21 +303,22 @@ res = client.insert(
     # Output
     #
     # {
    -#     "insert_count": 10,
    -#     "ids": [
    -#         10,
    -#         11,
    -#         12,
    -#         13,
    -#         14,
    -#         15,
    -#         16,
    -#         17,
    -#         18,
    -#         19
    -#     ]
    +# "insert_count": 10,
    +# "ids": [
    +# 10,
    +# 11,
    +# 12,
    +# 13,
    +# 14,
    +# 15,
    +# 16,
    +# 17,
    +# 18,
    +# 19
    +# ]
     # }
     
    +
    // 4. Insert some more data into a specific partition
     data = Arrays.asList(
         new JSONObject(Map.of("id", 10L, "vector", Arrays.asList(-0.5570353903748935f, -0.8997887893201304f, -0.7123782431855732f, -0.6298990746450119f, 0.6699215060604258f), "color", "red_1202")),
    @@ -435,8 +439,8 @@ data=[
     ]
     
     res = client.upsert(
    -    collection_name='quick_setup',
    -    data=data
    +collection_name='quick_setup',
    +data=data
     )
     
     print(res)
    @@ -444,9 +448,10 @@ res = client.upsert(
     # Output
     #
     # {
    -#     "upsert_count": 10
    +# "upsert_count": 10
     # }
     
    +
    // 5. Upsert some data
     data = Arrays.asList(
         new JSONObject(Map.of("id", 0L, "vector", Arrays.asList(-0.619954382375778f, 0.4479436794798608f, -0.17493894838751745f, -0.4248030059917294f, -0.8648452746018911f), "color", "black_9898")),
    @@ -517,9 +522,9 @@ data=[
     ]
     
     res = client.upsert(
    -    collection_name="quick_setup",
    -    data=data,
    -    partition_name="partitionA"
    +collection_name="quick_setup",
    +data=data,
    +partition_name="partitionA"
     )
     
     print(res)
    @@ -527,9 +532,10 @@ res = client.upsert(
     # Output
     #
     # {
    -#     "upsert_count": 10
    +# "upsert_count": 10
     # }
     
    +
    import io.milvus.v2.service.vector.request.UpsertReq;
     import io.milvus.v2.service.vector.response.UpsertResp;
     
    @@ -632,9 +638,10 @@ res = client.delete(
     # Output
     #
     # {
    -#     "delete_count": 3
    +# "delete_count": 3
     # }
     
    +
    import io.milvus.v2.service.vector.request.DeleteReq;
     import io.milvus.v2.service.vector.response.DeleteResp;
     
    @@ -681,9 +688,10 @@ res = await client.# Output
     #
     # {
    -#     "delete_count": 2
    +# "delete_count": 2
     # }
     
    +
    deleteReq = DeleteReq.builder()
         .collectionName("quick_setup")
         .ids(Arrays.asList(18L, 19L))
    @@ -724,8 +732,9 @@ partition_name='partitionA',
     print("Entities deleted from partitionA: ", res['delete_count'])
     
     # Output:
    -# Entities deleted from partitionA:  3
    +# Entities deleted from partitionA: 3
     
    +
    deleteReq = DeleteReq.builder()
         .collectionName("quick_setup")
         .filter('color like "blue%"')
    diff --git a/localization/v2.4.x/site/zh/userGuide/manage-collections.md b/localization/v2.4.x/site/zh/userGuide/manage-collections.md
    index c0d45bdad..f53384ca9 100644
    --- a/localization/v2.4.x/site/zh/userGuide/manage-collections.md
    +++ b/localization/v2.4.x/site/zh/userGuide/manage-collections.md
    @@ -1,8 +1,9 @@
     ---
     id: manage-collections.md
    -title: 管理收藏
    -summary: ''
    +title: 管理 Collection
    +summary: ""
     ---
    +
     

    Manage Collections

    +
    import io.milvus.v2.client.ConnectConfig;
     import io.milvus.v2.client.MilvusClientV2;
     import io.milvus.v2.service.collection.request.GetLoadStateReq;
    @@ -254,14 +256,15 @@ $ curl -X POST "http://# 3.1. Create schema
     schema = MilvusClient.create_schema(
    -    auto_id=False,
    -    enable_dynamic_field=True,
    +auto_id=False,
    +enable_dynamic_field=True,
     )
     
     # 3.2. Add fields to schema
     schema.add_field(field_name="my_id", datatype=DataType.INT64, is_primary=True)
     schema.add_field(field_name="my_vector", datatype=DataType.FLOAT_VECTOR, dim=5)
     
    +
    import io.milvus.v2.common.DataType;
     import io.milvus.v2.service.collection.request.CreateCollectionReq;
     
    @@ -461,17 +464,18 @@ index_params = client.prepare_index_params()
     
     # 3.4. Add indexes
     index_params.add_index(
    -    field_name="my_id",
    -    index_type="STL_SORT"
    +field_name="my_id",
    +index_type="STL_SORT"
     )
     
     index_params.add_index(
    -    field_name="my_vector", 
    -    index_type="IVF_FLAT",
    -    metric_type="IP",
    -    params={ "nlist": 128 }
    +field_name="my_vector",
    +index_type="IVF_FLAT",
    +metric_type="IP",
    +params={ "nlist": 128 }
     )
     
    +
    import io.milvus.v2.common.IndexParam;
     
     // 3.3 Prepare index parameters
    @@ -659,7 +663,7 @@ client.create_collection(
     time.sleep(5)
     
     res = client.get_load_state(
    -    collection_name="customized_setup_1"
    +collection_name="customized_setup_1"
     )
     
     print(res)
    @@ -667,9 +671,10 @@ res = client.get_load_state(
     # Output
     #
     # {
    -#     "state": "<LoadState: Loaded>"
    +# "state": "<LoadState: Loaded>"
     # }
     
    +
    import io.milvus.v2.service.collection.request.CreateCollectionReq;
     import io.milvus.v2.service.collection.request.GetLoadStateReq;
     
    @@ -799,7 +804,7 @@ client.create_collection(
     )
     
     res = client.get_load_state(
    -    collection_name="customized_setup_2"
    +collection_name="customized_setup_2"
     )
     
     print(res)
    @@ -807,9 +812,10 @@ res = client.get_load_state(
     # Output
     #
     # {
    -#     "state": "<LoadState: NotLoad>"
    +# "state": "<LoadState: NotLoad>"
     # }
     
    +
    // 3.6 Create a collection and index it separately
     CreateCollectionReq customizedSetupReq2 = CreateCollectionReq.builder()
         .collectionName("customized_setup_2")
    @@ -1018,7 +1024,7 @@ client.create_index(
     )
     
     res = client.get_load_state(
    -    collection_name="customized_setup_2"
    +collection_name="customized_setup_2"
     )
     
     print(res)
    @@ -1026,9 +1032,10 @@ res = client.get_load_state(
     # Output
     #
     # {
    -#     "state": "<LoadState: NotLoad>"
    +# "state": "<LoadState: NotLoad>"
     # }
     
    +
    CreateIndexReq  createIndexReq = CreateIndexReq.builder()
         .collectionName("customized_setup_2")
         .indexParams(indexParams)
    @@ -1256,40 +1263,41 @@ res = client.describe_collection(
     # Output
     #
     # {
    -#     "collection_name": "customized_setup_2",
    -#     "auto_id": false,
    -#     "num_shards": 1,
    -#     "description": "",
    -#     "fields": [
    -#         {
    -#             "field_id": 100,
    -#             "name": "my_id",
    -#             "description": "",
    -#             "type": 5,
    -#             "params": {},
    -#             "element_type": 0,
    -#             "is_primary": true
    -#         },
    -#         {
    -#             "field_id": 101,
    -#             "name": "my_vector",
    -#             "description": "",
    -#             "type": 101,
    -#             "params": {
    -#                 "dim": 5
    -#             },
    -#             "element_type": 0
    -#         }
    -#     ],
    -#     "aliases": [],
    -#     "collection_id": 448143479230158446,
    -#     "consistency_level": 2,
    -#     "properties": {},
    -#     "num_partitions": 1,
    -#     "enable_dynamic_field": true
    +# "collection_name": "customized_setup_2",
    +# "auto_id": false,
    +# "num_shards": 1,
    +# "description": "",
    +# "fields": [
    +# {
    +# "field_id": 100,
    +# "name": "my_id",
    +# "description": "",
    +# "type": 5,
    +# "params": {},
    +# "element_type": 0,
    +# "is_primary": true
    +# },
    +# {
    +# "field_id": 101,
    +# "name": "my_vector",
    +# "description": "",
    +# "type": 101,
    +# "params": {
    +# "dim": 5
    +# },
    +# "element_type": 0
    +# }
    +# ],
    +# "aliases": [],
    +# "collection_id": 448143479230158446,
    +# "consistency_level": 2,
    +# "properties": {},
    +# "num_partitions": 1,
    +# "enable_dynamic_field": true
     # }
     
     
    +
    import io.milvus.v2.service.collection.request.DescribeCollectionReq;
     import io.milvus.v2.service.collection.response.DescribeCollectionResp;
     
    @@ -1453,11 +1461,12 @@ res = client.list_collections()
     # Output
     #
     # [
    -#     "customized_setup_2",
    -#     "quick_setup",
    -#     "customized_setup_1"
    +# "customized_setup_2",
    +# "quick_setup",
    +# "customized_setup_1"
     # ]
     
    +
    import io.milvus.v2.service.collection.response.ListCollectionsResp;
     
     // 5. List all collection names
    @@ -1544,7 +1553,7 @@ client.load_collection(
     )
     
     res = client.get_load_state(
    -    collection_name="customized_setup_2"
    +collection_name="customized_setup_2"
     )
     
     print(res)
    @@ -1552,9 +1561,10 @@ res = client.get_load_state(
     # Output
     #
     # {
    -#     "state": "<LoadState: Loaded>"
    +# "state": "<LoadState: Loaded>"
     # }
     
    +
    import io.milvus.v2.service.collection.request.LoadCollectionReq;
     
     // 6. Load the collection
    @@ -1654,7 +1664,7 @@ client.release_collection(
     )
     
     res = client.get_load_state(
    -    collection_name="customized_setup_2"
    +collection_name="customized_setup_2"
     )
     
     print(res)
    @@ -1662,9 +1672,10 @@ res = client.get_load_state(
     # Output
     #
     # {
    -#     "state": "<LoadState: NotLoad>"
    +# "state": "<LoadState: NotLoad>"
     # }
     
    +
    import io.milvus.v2.service.collection.request.ReleaseCollectionReq;
     
     // 8. Release the collection
    @@ -1775,10 +1786,11 @@ client.create_alias(
     )
     
     client.create_alias(
    -    collection_name="customized_setup_2",
    -    alias="alice"
    +collection_name="customized_setup_2",
    +alias="alice"
     )
     
    +
    import io.milvus.v2.service.utility.request.CreateAliasReq;
     
     // 9. Manage aliases
    @@ -1952,14 +1964,15 @@ res = client.list_aliases(
     # Output
     #
     # {
    -#     "aliases": [
    -#         "bob",
    -#         "alice"
    -#     ],
    -#     "collection_name": "customized_setup_2",
    -#     "db_name": "default"
    +# "aliases": [
    +# "bob",
    +# "alice"
    +# ],
    +# "collection_name": "customized_setup_2",
    +# "db_name": "default"
     # }
     
    +
    import io.milvus.v2.service.utility.request.ListAliasesReq;
     import io.milvus.v2.service.utility.response.ListAliasResp;
     
    @@ -2032,11 +2045,12 @@ res = client.describe_alias(
     # Output
     #
     # {
    -#     "alias": "bob",
    -#     "collection_name": "customized_setup_2",
    -#     "db_name": "default"
    +# "alias": "bob",
    +# "collection_name": "customized_setup_2",
    +# "db_name": "default"
     # }
     
    +
    import io.milvus.v2.service.utility.request.DescribeAliasReq;
     import io.milvus.v2.service.utility.response.DescribeAliasResp;
     
    @@ -2120,7 +2134,7 @@ client.alter_alias(
     )
     
     res = client.list_aliases(
    -    collection_name="customized_setup_1"
    +collection_name="customized_setup_1"
     )
     
     print(res)
    @@ -2128,15 +2142,15 @@ res = client.list_aliases(
     # Output
     #
     # {
    -#     "aliases": [
    -#         "alice"
    -#     ],
    -#     "collection_name": "customized_setup_1",
    -#     "db_name": "default"
    +# "aliases": [
    +# "alice"
    +# ],
    +# "collection_name": "customized_setup_1",
    +# "db_name": "default"
     # }
     
     res = client.list_aliases(
    -    collection_name="customized_setup_2"
    +collection_name="customized_setup_2"
     )
     
     print(res)
    @@ -2144,13 +2158,14 @@ res = client.list_aliases(
     # Output
     #
     # {
    -#     "aliases": [
    -#         "bob"
    -#     ],
    -#     "collection_name": "customized_setup_2",
    -#     "db_name": "default"
    +# "aliases": [
    +# "bob"
    +# ],
    +# "collection_name": "customized_setup_2",
    +# "db_name": "default"
     # }
     
    +
    import io.milvus.v2.service.utility.request.AlterAliasReq;
     
     // 9.4 Reassign alias to other collections
    @@ -2283,9 +2298,10 @@ client.drop_alias(
     )
     
     client.drop_alias(
    -    alias="alice"
    +alias="alice"
     )
     
    +
    import io.milvus.v2.service.utility.request.DropAliasReq;
     
     // 9.5 Drop alias
    @@ -2378,11 +2394,12 @@ collection = Collection("quick_setup"
     
     # Set the TTL for the data in the collection
     collection.set_properties(
    -    properties={
    -        "collection.ttl.seconds": 60
    -    }
    +properties={
    +"collection.ttl.seconds": 60
    +}
     )
     
    +

    Set MMAP

    Configure the memory mapping (MMAP) property for the collection, which determines whether data is mapped into memory to improve query performance. For more information, refer to Configure memory mapping .

    @@ -2401,11 +2418,12 @@ collection.release() # Set memory mapping property to True or Flase collection.set_properties( - properties={ - "mmap.enabled": True - } +properties={ +"mmap.enabled": True +} )
    +

    Drop a Collection +
    import io.milvus.v2.service.collection.request.DropCollectionReq;
     
     // 10. Drop collections
    diff --git a/localization/v2.4.x/site/zh/userGuide/manage-partitions.md b/localization/v2.4.x/site/zh/userGuide/manage-partitions.md
    index 8b59b7992..19edb5043 100644
    --- a/localization/v2.4.x/site/zh/userGuide/manage-partitions.md
    +++ b/localization/v2.4.x/site/zh/userGuide/manage-partitions.md
    @@ -1,8 +1,9 @@
     ---
     id: manage-partitions.md
     title: 管理分区
    -summary: ''
    +summary: ""
     ---
    +
     

    管理分区

    +
    import io.milvus.v2.client.ConnectConfig;
     import io.milvus.v2.client.MilvusClientV2;
     import io.milvus.v2.service.collection.request.CreateCollectionReq;
    @@ -153,6 +155,7 @@ res = client.list_partitions(collection_name="qui
     #
     # ["_default"]
     
    +
    import io.milvus.v2.service.partition.request.ListPartitionsReq;
     
     // 3. List all partitions in the collection
    @@ -219,8 +222,8 @@ client.create_partition(
     )
     
     client.create_partition(
    -    collection_name="quick_setup",
    -    partition_name="partitionB"
    +collection_name="quick_setup",
    +partition_name="partitionB"
     )
     
     res = client.list_partitions(collection_name="quick_setup")
    @@ -230,6 +233,7 @@ res = client.list_partitions(collection_name="qui
     #
     # ["_default", "partitionA", "partitionB"]
     
    +
    import io.milvus.v2.service.partition.request.CreatePartitionReq;
     
     // 4. Create more partitions
    @@ -288,7 +292,7 @@ res = await client.
     

    注释

    如果将某个字段设置为集合中的分区键,Milvus 将负责管理集合中的分区。因此,在尝试创建分区时可能会遇到提示错误。

    -

    有关详细信息,请参阅使用分区密钥

    +

    有关详细信息,请参阅使用Partition Key

    检查特定分区

    +
    import io.milvus.v2.service.partition.request.HasPartitionReq;
     
     // 5. Check whether a partition exists
    @@ -428,12 +433,12 @@ res = client.get_load_state(collection_name="quic
     # Output
     #
     # {
    -#     "state": "<LoadState: Loaded>"
    +# "state": "<LoadState: Loaded>"
     # }
     
     res = client.get_load_state(
    -    collection_name="quick_setup", 
    -    partition_name="partitionA"
    +collection_name="quick_setup",
    +partition_name="partitionA"
     )
     
     print(res)
    @@ -441,12 +446,12 @@ res = client.get_load_state(
     # Output
     #
     # {
    -#     "state": "<LoadState: Loaded>"
    +# "state": "<LoadState: Loaded>"
     # }
     
     res = client.get_load_state(
    -    collection_name="quick_setup", 
    -    partition_name="partitionB"
    +collection_name="quick_setup",
    +partition_name="partitionB"
     )
     
     print(res)
    @@ -454,10 +459,11 @@ res = client.get_load_state(
     # Output
     #
     # {
    -#     "state": "<LoadState: NotLoad>"
    +# "state": "<LoadState: NotLoad>"
     # }
     
     
    +
    import io.milvus.v2.service.collection.request.GetLoadStateReq;
     import io.milvus.v2.service.collection.request.ReleaseCollectionReq;
     import io.milvus.v2.service.partition.request.LoadPartitionsReq;
    @@ -606,9 +612,10 @@ res = client.get_load_state(collection_name="quic
     # Output
     #
     # {
    -#     "state": "<LoadState: Loaded>"
    +# "state": "<LoadState: Loaded>"
     # }
     
    +
    LoadPartitionsReq loadPartitionsReq = LoadPartitionsReq.builder()
         .collectionName("quick_setup")
         .partitionNames(List.of("partitionA"))
    @@ -654,27 +661,28 @@ res = await client."quick_setup",
    -    partition_name="partitionA"
    +collection_name="quick_setup",
    +partition_name="partitionA"
     )
     
     # Output
     #
     # {
    -#     "state": "<LoadState: Loaded>"
    +# "state": "<LoadState: Loaded>"
     # }
     
     res = client.get_load_status(
    -    collection_name="quick_setup",
    -    partition_name="partitionB"
    +collection_name="quick_setup",
    +partition_name="partitionB"
     )
     
     # Output
     #
     # {
    -#     "state": "<LoadState: Loaded>"
    +# "state": "<LoadState: Loaded>"
     # }
     
    +
    LoadPartitionsReq loadPartitionsReq = LoadPartitionsReq.builder()
         .collectionName("quick_setup")
         .partitionNames(List.of("partitionA", "partitionB"))
    @@ -753,8 +761,8 @@ client.release_partitions(
     )
     
     res = client.get_load_state(
    -    collection_name="quick_setup", 
    -    partition_name="partitionA"
    +collection_name="quick_setup",
    +partition_name="partitionA"
     )
     
     print(res)
    @@ -762,10 +770,11 @@ res = client.get_load_state(
     # Output
     #
     # {
    -#     "state": "<LoadState: NotLoad>"
    +# "state": "<LoadState: NotLoad>"
     # }
     
     
    +
    import io.milvus.v2.service.partition.request.ReleasePartitionsReq;
     
     // 7. Release a partition
    @@ -812,15 +821,16 @@ res = await client."quick_setup",
    +collection_name="quick_setup",
     )
     
     # Output
     #
     # {
    -#     "state": "<LoadState: NotLoad>"
    +# "state": "<LoadState: NotLoad>"
     # }
     
    +

    丢弃分区 +
    import io.milvus.v2.service.partition.request.ReleasePartitionsReq;
     
     ReleasePartitionsReq releasePartitionsReq = ReleasePartitionsReq.builder()
    @@ -931,9 +942,9 @@ res = await client.rootCoord.maxPartitionNum 来调整分区的最大数量。有关详情,请参阅系统配置

    -
  • 如何区分分区和分区密钥?

    -

    分区是物理存储单元,而分区密钥是逻辑概念,可根据指定列自动将数据分配到特定分区。

    +
  • 如何区分分区和Partition Key?

    +

    分区是物理存储单元,而Partition Key是逻辑概念,可根据指定列自动将数据分配到特定分区。

    例如,在 Milvus 中,如果你有一个定义了分区键为color 字段的集合,系统会根据每个实体的color 字段的散列值自动将数据分配到分区。这一自动化流程免除了用户在插入或搜索数据时手动指定分区的责任。

    另一方面,在手动创建分区时,需要根据分区键的标准为每个分区分配数据。如果你有一个带有color 字段的集合,你需要手动将color 值为red 的实体分配到partition A ,将color 值为blue 的实体分配到partition B 。这种手动管理需要更多的精力。

    -

    总之,分区和分区键都是用来优化数据计算和提高查询效率的。必须认识到,启用分区密钥意味着放弃对分区数据插入和加载的人工管理控制,因为这些过程完全由 Milvus 自动处理。

  • +

    总之,分区和分区键都是用来优化数据计算和提高查询效率的。必须认识到,启用Partition Key意味着放弃对分区数据插入和加载的人工管理控制,因为这些过程完全由 Milvus 自动处理。

    diff --git a/localization/v2.4.x/site/zh/userGuide/tools/cli_commands.md b/localization/v2.4.x/site/zh/userGuide/tools/cli_commands.md index d14d8103c..22175b24a 100644 --- a/localization/v2.4.x/site/zh/userGuide/tools/cli_commands.md +++ b/localization/v2.4.x/site/zh/userGuide/tools/cli_commands.md @@ -297,12 +297,12 @@ title: Milvus_CLI 命令参考 -c-集合名称集合的名称。 --f-字段模式(多个)<fieldName>:<dataType>:<dimOfVector/desc> 格式的字段模式。 +-f-字段 Schema (多个)<fieldName>:<dataType>:<dimOfVector/desc> 格式的字段 Schema 。 -p-主键字段主键字段的名称。 -a-schema-auto-id(自动 ID(可选)自动生成 ID 的标志。 -desc-模式描述(可选)集合的描述。 -级别-一致性级别(可选)一致性级别:有界、会话、强、最终。 --d-是否动态(可选)集合模式是否支持动态字段。 +-d-是否动态(可选)Collection Schema 是否支持Dynamic Field。 -s-碎片数(可选)分区编号 -帮助不适用显示命令使用帮助。 diff --git a/localization/v2.4.x/site/zh/userGuide/use-partition-key.json b/localization/v2.4.x/site/zh/userGuide/use-partition-key.json index 586862c81..f4c454380 100644 --- a/localization/v2.4.x/site/zh/userGuide/use-partition-key.json +++ b/localization/v2.4.x/site/zh/userGuide/use-partition-key.json @@ -1 +1,63 @@ -{"codeList":["import random, time\nfrom pymilvus import connections, MilvusClient, DataType\n\nSERVER_ADDR = \"http://localhost:19530\"\n\n# 1. Set up a Milvus client\nclient = MilvusClient(\n uri=SERVER_ADDR\n)\n\n# 2. Create a collection\nschema = MilvusClient.create_schema(\n auto_id=False,\n enable_dynamic_field=True,\n partition_key_field=\"color\",\n num_partitions=16 # Number of partitions. Defaults to 16.\n)\n\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True)\nschema.add_field(field_name=\"vector\", datatype=DataType.FLOAT_VECTOR, dim=5)\nschema.add_field(field_name=\"color\", datatype=DataType.VARCHAR, max_length=512)\n","import io.milvus.v2.client.ConnectConfig;\nimport io.milvus.v2.client.MilvusClientV2;\nimport io.milvus.v2.common.DataType;\nimport io.milvus.v2.common.IndexParam;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nString CLUSTER_ENDPOINT = \"http://localhost:19530\";\n\n// 1. Connect to Milvus server\nConnectConfig connectConfig = ConnectConfig.builder()\n .uri(CLUSTER_ENDPOINT)\n .build();\n\nMilvusClientV2 client = new MilvusClientV2(connectConfig);\n\n// 2. Create a collection in customized setup mode\n\n// 2.1 Create schema\nCreateCollectionReq.CollectionSchema schema = client.createSchema();\n\n// 2.2 Add fields to schema\nschema.addField(AddFieldReq.builder()\n .fieldName(\"id\")\n .dataType(DataType.Int64)\n .isPrimaryKey(true)\n .autoID(false)\n .build());\n\nschema.addField(AddFieldReq.builder()\n .fieldName(\"vector\")\n .dataType(DataType.FloatVector)\n .dimension(5)\n .build());\n \nschema.addField(AddFieldReq.builder()\n .fieldName(\"color\")\n .dataType(DataType.VarChar)\n .maxLength(512)\n .isPartitionKey(true)\n .build());\n","const { MilvusClient, DataType, sleep } = require(\"@zilliz/milvus2-sdk-node\")\n\nconst address = \"http://localhost:19530\"\n\nasync function main() {\n// 1. Set up a Milvus Client\nclient = new MilvusClient({address}); \n\n// 2. Create a collection\n// 2.1 Define fields\nconst fields = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n auto_id: false\n },\n {\n name: \"vector\",\n data_type: DataType.FloatVector,\n dim: 5\n },\n {\n name: \"color\",\n data_type: DataType.VarChar,\n max_length: 512,\n is_partition_key: true\n }\n]\n","index_params = MilvusClient.prepare_index_params()\n\nindex_params.add_index(\n field_name=\"id\",\n index_type=\"STL_SORT\"\n)\n\nindex_params.add_index(\n field_name=\"color\",\n index_type=\"Trie\"\n)\n\nindex_params.add_index(\n field_name=\"vector\",\n index_type=\"IVF_FLAT\",\n metric_type=\"L2\",\n params={\"nlist\": 1024}\n)\n","// 2.3 Prepare index parameters\nIndexParam indexParamForVectorField = IndexParam.builder()\n .fieldName(\"vector\")\n .indexType(IndexParam.IndexType.IVF_FLAT)\n .metricType(IndexParam.MetricType.IP)\n .extraParams(Map.of(\"nlist\", 1024))\n .build();\n\nList indexParams = new ArrayList<>();\nindexParams.add(indexParamForVectorField);\n","// 2.2 Prepare index parameters\nconst index_params = [{\n field_name: \"color\",\n index_type: \"Trie\"\n},{\n field_name: \"id\",\n index_type: \"STL_SORT\"\n},{\n field_name: \"vector\",\n index_type: \"IVF_FLAT\",\n metric_type: \"IP\",\n params: { nlist: 1024}\n}]\n","client.create_collection(\n collection_name=\"test_collection\",\n schema=schema,\n index_params=index_params\n)\n","// 2.4 Create a collection with schema and index parameters\nCreateCollectionReq customizedSetupReq = CreateCollectionReq.builder()\n .collectionName(\"test_collection\")\n .collectionSchema(schema)\n .indexParams(indexParams) \n .build();\n\nclient.createCollection(customizedSetupReq);\n","// 2.3 Create a collection with fields and index parameters\nres = await client.createCollection({\n collection_name: \"test_collection\",\n fields: fields, \n index_params: index_params,\n})\n\nconsole.log(res.error_code)\n\n// Output\n// \n// Success\n//\n","# 3. Insert randomly generated vectors \ncolors = [\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\"]\ndata = []\n\nfor i in range(1000):\n current_color = random.choice(colors)\n current_tag = random.randint(1000, 9999)\n data.append({\n \"id\": i,\n \"vector\": [ random.uniform(-1, 1) for _ in range(5) ],\n \"color\": current_color,\n \"tag\": current_tag,\n \"color_tag\": f\"{current_color}_{str(current_tag)}\"\n })\n\nprint(data[0])\n","// 3. Insert randomly generated vectors\nList colors = Arrays.asList(\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\");\nList data = new ArrayList<>();\n\nfor (int i=0; i<1000; i++) {\n Random rand = new Random();\n String current_color = colors.get(rand.nextInt(colors.size()-1));\n int current_tag = rand.nextInt(8999) + 1000;\n JSONObject row = new JSONObject();\n row.put(\"id\", Long.valueOf(i));\n row.put(\"vector\", Arrays.asList(rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat()));\n row.put(\"color\", current_color);\n row.put(\"tag\", current_tag);\n row.put(\"color_tag\", current_color + \"_\" + String.valueOf(rand.nextInt(8999) + 1000));\n data.add(row);\n}\n\nSystem.out.println(JSONObject.toJSON(data.get(0))); \n","// 3. Insert randomly generated vectors \nconst colors = [\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\"]\nvar data = []\n\nfor (let i = 0; i < 1000; i++) {\n const current_color = colors[Math.floor(Math.random() * colors.length)]\n const current_tag = Math.floor(Math.random() * 8999 + 1000)\n data.push({\n id: i,\n vector: [Math.random(), Math.random(), Math.random(), Math.random(), Math.random()],\n color: current_color,\n tag: current_tag,\n color_tag: `${current_color}_${current_tag}`\n })\n}\n\nconsole.log(data[0])\n","{\n id: 0,\n vector: [\n 0.1275656405044483,\n 0.47417858592773277,\n 0.13858264437643286,\n 0.2390904907020377,\n 0.8447862593689635\n ],\n color: 'blue',\n tag: 2064,\n color_tag: 'blue_2064'\n}\n","res = client.insert(\n collection_name=\"test_collection\",\n data=data\n)\n\nprint(res)\n\n# Output\n#\n# {\n# \"insert_count\": 1000,\n# \"ids\": [\n# 0,\n# 1,\n# 2,\n# 3,\n# 4,\n# 5,\n# 6,\n# 7,\n# 8,\n# 9,\n# \"(990 more items hidden)\"\n# ]\n# }\n","// 3.1 Insert data into the collection\nInsertReq insertReq = InsertReq.builder()\n .collectionName(\"test_collection\")\n .data(data)\n .build();\n\nInsertResp insertResp = client.insert(insertReq);\n\nSystem.out.println(JSONObject.toJSON(insertResp));\n\n// Output:\n// {\"insertCnt\": 1000}\n","res = await client.insert({\n collection_name: \"test_collection\",\n data: data,\n})\n\nconsole.log(res.insert_cnt)\n\n// Output\n// \n// 1000\n// \n","# 4. Search with partition key\nquery_vectors = [[0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]]\n\nres = client.search(\n collection_name=\"test_collection\",\n data=query_vectors,\n filter=\"color == 'green'\",\n search_params={\"metric_type\": \"L2\", \"params\": {\"nprobe\": 10}},\n output_fields=[\"id\", \"color_tag\"],\n limit=3\n)\n\nprint(res)\n\n# Output\n#\n# [\n# [\n# {\n# \"id\": 970,\n# \"distance\": 0.5770174264907837,\n# \"entity\": {\n# \"id\": 970,\n# \"color_tag\": \"green_9828\"\n# }\n# },\n# {\n# \"id\": 115,\n# \"distance\": 0.6898155808448792,\n# \"entity\": {\n# \"id\": 115,\n# \"color_tag\": \"green_4073\"\n# }\n# },\n# {\n# \"id\": 899,\n# \"distance\": 0.7028976678848267,\n# \"entity\": {\n# \"id\": 899,\n# \"color_tag\": \"green_9897\"\n# }\n# }\n# ]\n# ]\n","// 4. Search with partition key\nList> query_vectors = Arrays.asList(Arrays.asList(0.3580376395471989f, -0.6023495712049978f, 0.18414012509913835f, -0.26286205330961354f, 0.9029438446296592f));\n\nSearchReq searchReq = SearchReq.builder()\n .collectionName(\"test_collection\")\n .data(query_vectors)\n .filter(\"color == \\\"green\\\"\")\n .topK(3)\n .build();\n\nSearchResp searchResp = client.search(searchReq);\n\nSystem.out.println(JSONObject.toJSON(searchResp)); \n\n// Output:\n// {\"searchResults\": [[\n// {\n// \"distance\": 1.0586997,\n// \"id\": 414,\n// \"entity\": {}\n// },\n// {\n// \"distance\": 0.981384,\n// \"id\": 293,\n// \"entity\": {}\n// },\n// {\n// \"distance\": 0.9548756,\n// \"id\": 325,\n// \"entity\": {}\n// }\n// ]]}\n","// 4. Search with partition key\nconst query_vectors = [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]\n\nres = await client.search({\n collection_name: \"test_collection\",\n data: query_vectors,\n filter: \"color == 'green'\",\n output_fields: [\"color_tag\"],\n limit: 3\n})\n\nconsole.log(res.results)\n\n// Output\n// \n// [\n// { score: 2.402090549468994, id: '135', color_tag: 'green_2694' },\n// { score: 2.3938629627227783, id: '326', color_tag: 'green_7104' },\n// { score: 2.3235254287719727, id: '801', color_tag: 'green_3162' }\n// ]\n// \n"],"headingContent":"","anchorList":[{"label":"使用分区密钥","href":"Use-Partition-Key","type":1,"isActive":false},{"label":"概述","href":"Overview","type":2,"isActive":false},{"label":"启用分区密钥","href":"Enable-partition-key","type":2,"isActive":false},{"label":"列出分区","href":"List-partitions","type":2,"isActive":false},{"label":"插入数据","href":"Insert-data","type":2,"isActive":false},{"label":"使用分区键","href":"Use-partition-key","type":2,"isActive":false},{"label":"典型用例","href":"Typical-use-cases","type":2,"isActive":false}]} \ No newline at end of file +{ + "codeList": [ + "import random, time\nfrom pymilvus import connections, MilvusClient, DataType\n\nSERVER_ADDR = \"http://localhost:19530\"\n\n# 1. Set up a Milvus client\nclient = MilvusClient(\n uri=SERVER_ADDR\n)\n\n# 2. Create a collection\nschema = MilvusClient.create_schema(\n auto_id=False,\n enable_dynamic_field=True,\n partition_key_field=\"color\",\n num_partitions=16 # Number of partitions. Defaults to 16.\n)\n\nschema.add_field(field_name=\"id\", datatype=DataType.INT64, is_primary=True)\nschema.add_field(field_name=\"vector\", datatype=DataType.FLOAT_VECTOR, dim=5)\nschema.add_field(field_name=\"color\", datatype=DataType.VARCHAR, max_length=512)\n", + "import io.milvus.v2.client.ConnectConfig;\nimport io.milvus.v2.client.MilvusClientV2;\nimport io.milvus.v2.common.DataType;\nimport io.milvus.v2.common.IndexParam;\nimport io.milvus.v2.service.collection.request.AddFieldReq;\nimport io.milvus.v2.service.collection.request.CreateCollectionReq;\n\nString CLUSTER_ENDPOINT = \"http://localhost:19530\";\n\n// 1. Connect to Milvus server\nConnectConfig connectConfig = ConnectConfig.builder()\n .uri(CLUSTER_ENDPOINT)\n .build();\n\nMilvusClientV2 client = new MilvusClientV2(connectConfig);\n\n// 2. Create a collection in customized setup mode\n\n// 2.1 Create schema\nCreateCollectionReq.CollectionSchema schema = client.createSchema();\n\n// 2.2 Add fields to schema\nschema.addField(AddFieldReq.builder()\n .fieldName(\"id\")\n .dataType(DataType.Int64)\n .isPrimaryKey(true)\n .autoID(false)\n .build());\n\nschema.addField(AddFieldReq.builder()\n .fieldName(\"vector\")\n .dataType(DataType.FloatVector)\n .dimension(5)\n .build());\n \nschema.addField(AddFieldReq.builder()\n .fieldName(\"color\")\n .dataType(DataType.VarChar)\n .maxLength(512)\n .isPartitionKey(true)\n .build());\n", + "const { MilvusClient, DataType, sleep } = require(\"@zilliz/milvus2-sdk-node\")\n\nconst address = \"http://localhost:19530\"\n\nasync function main() {\n// 1. Set up a Milvus Client\nclient = new MilvusClient({address}); \n\n// 2. Create a collection\n// 2.1 Define fields\nconst fields = [\n {\n name: \"id\",\n data_type: DataType.Int64,\n is_primary_key: true,\n auto_id: false\n },\n {\n name: \"vector\",\n data_type: DataType.FloatVector,\n dim: 5\n },\n {\n name: \"color\",\n data_type: DataType.VarChar,\n max_length: 512,\n is_partition_key: true\n }\n]\n", + "index_params = MilvusClient.prepare_index_params()\n\nindex_params.add_index(\n field_name=\"id\",\n index_type=\"STL_SORT\"\n)\n\nindex_params.add_index(\n field_name=\"color\",\n index_type=\"Trie\"\n)\n\nindex_params.add_index(\n field_name=\"vector\",\n index_type=\"IVF_FLAT\",\n metric_type=\"L2\",\n params={\"nlist\": 1024}\n)\n", + "// 2.3 Prepare index parameters\nIndexParam indexParamForVectorField = IndexParam.builder()\n .fieldName(\"vector\")\n .indexType(IndexParam.IndexType.IVF_FLAT)\n .metricType(IndexParam.MetricType.IP)\n .extraParams(Map.of(\"nlist\", 1024))\n .build();\n\nList indexParams = new ArrayList<>();\nindexParams.add(indexParamForVectorField);\n", + "// 2.2 Prepare index parameters\nconst index_params = [{\n field_name: \"color\",\n index_type: \"Trie\"\n},{\n field_name: \"id\",\n index_type: \"STL_SORT\"\n},{\n field_name: \"vector\",\n index_type: \"IVF_FLAT\",\n metric_type: \"IP\",\n params: { nlist: 1024}\n}]\n", + "client.create_collection(\n collection_name=\"test_collection\",\n schema=schema,\n index_params=index_params\n)\n", + "// 2.4 Create a collection with schema and index parameters\nCreateCollectionReq customizedSetupReq = CreateCollectionReq.builder()\n .collectionName(\"test_collection\")\n .collectionSchema(schema)\n .indexParams(indexParams) \n .build();\n\nclient.createCollection(customizedSetupReq);\n", + "// 2.3 Create a collection with fields and index parameters\nres = await client.createCollection({\n collection_name: \"test_collection\",\n fields: fields, \n index_params: index_params,\n})\n\nconsole.log(res.error_code)\n\n// Output\n// \n// Success\n//\n", + "# 3. Insert randomly generated vectors \ncolors = [\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\"]\ndata = []\n\nfor i in range(1000):\n current_color = random.choice(colors)\n current_tag = random.randint(1000, 9999)\n data.append({\n \"id\": i,\n \"vector\": [ random.uniform(-1, 1) for _ in range(5) ],\n \"color\": current_color,\n \"tag\": current_tag,\n \"color_tag\": f\"{current_color}_{str(current_tag)}\"\n })\n\nprint(data[0])\n", + "// 3. Insert randomly generated vectors\nList colors = Arrays.asList(\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\");\nList data = new ArrayList<>();\n\nfor (int i=0; i<1000; i++) {\n Random rand = new Random();\n String current_color = colors.get(rand.nextInt(colors.size()-1));\n int current_tag = rand.nextInt(8999) + 1000;\n JSONObject row = new JSONObject();\n row.put(\"id\", Long.valueOf(i));\n row.put(\"vector\", Arrays.asList(rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat(), rand.nextFloat()));\n row.put(\"color\", current_color);\n row.put(\"tag\", current_tag);\n row.put(\"color_tag\", current_color + \"_\" + String.valueOf(rand.nextInt(8999) + 1000));\n data.add(row);\n}\n\nSystem.out.println(JSONObject.toJSON(data.get(0))); \n", + "// 3. Insert randomly generated vectors \nconst colors = [\"green\", \"blue\", \"yellow\", \"red\", \"black\", \"white\", \"purple\", \"pink\", \"orange\", \"brown\", \"grey\"]\nvar data = []\n\nfor (let i = 0; i < 1000; i++) {\n const current_color = colors[Math.floor(Math.random() * colors.length)]\n const current_tag = Math.floor(Math.random() * 8999 + 1000)\n data.push({\n id: i,\n vector: [Math.random(), Math.random(), Math.random(), Math.random(), Math.random()],\n color: current_color,\n tag: current_tag,\n color_tag: `${current_color}_${current_tag}`\n })\n}\n\nconsole.log(data[0])\n", + "{\n id: 0,\n vector: [\n 0.1275656405044483,\n 0.47417858592773277,\n 0.13858264437643286,\n 0.2390904907020377,\n 0.8447862593689635\n ],\n color: 'blue',\n tag: 2064,\n color_tag: 'blue_2064'\n}\n", + "res = client.insert(\n collection_name=\"test_collection\",\n data=data\n)\n\nprint(res)\n\n# Output\n#\n# {\n# \"insert_count\": 1000,\n# \"ids\": [\n# 0,\n# 1,\n# 2,\n# 3,\n# 4,\n# 5,\n# 6,\n# 7,\n# 8,\n# 9,\n# \"(990 more items hidden)\"\n# ]\n# }\n", + "// 3.1 Insert data into the collection\nInsertReq insertReq = InsertReq.builder()\n .collectionName(\"test_collection\")\n .data(data)\n .build();\n\nInsertResp insertResp = client.insert(insertReq);\n\nSystem.out.println(JSONObject.toJSON(insertResp));\n\n// Output:\n// {\"insertCnt\": 1000}\n", + "res = await client.insert({\n collection_name: \"test_collection\",\n data: data,\n})\n\nconsole.log(res.insert_cnt)\n\n// Output\n// \n// 1000\n// \n", + "# 4. Search with partition key\nquery_vectors = [[0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]]\n\nres = client.search(\n collection_name=\"test_collection\",\n data=query_vectors,\n filter=\"color == 'green'\",\n search_params={\"metric_type\": \"L2\", \"params\": {\"nprobe\": 10}},\n output_fields=[\"id\", \"color_tag\"],\n limit=3\n)\n\nprint(res)\n\n# Output\n#\n# [\n# [\n# {\n# \"id\": 970,\n# \"distance\": 0.5770174264907837,\n# \"entity\": {\n# \"id\": 970,\n# \"color_tag\": \"green_9828\"\n# }\n# },\n# {\n# \"id\": 115,\n# \"distance\": 0.6898155808448792,\n# \"entity\": {\n# \"id\": 115,\n# \"color_tag\": \"green_4073\"\n# }\n# },\n# {\n# \"id\": 899,\n# \"distance\": 0.7028976678848267,\n# \"entity\": {\n# \"id\": 899,\n# \"color_tag\": \"green_9897\"\n# }\n# }\n# ]\n# ]\n", + "// 4. Search with partition key\nList> query_vectors = Arrays.asList(Arrays.asList(0.3580376395471989f, -0.6023495712049978f, 0.18414012509913835f, -0.26286205330961354f, 0.9029438446296592f));\n\nSearchReq searchReq = SearchReq.builder()\n .collectionName(\"test_collection\")\n .data(query_vectors)\n .filter(\"color == \\\"green\\\"\")\n .topK(3)\n .build();\n\nSearchResp searchResp = client.search(searchReq);\n\nSystem.out.println(JSONObject.toJSON(searchResp)); \n\n// Output:\n// {\"searchResults\": [[\n// {\n// \"distance\": 1.0586997,\n// \"id\": 414,\n// \"entity\": {}\n// },\n// {\n// \"distance\": 0.981384,\n// \"id\": 293,\n// \"entity\": {}\n// },\n// {\n// \"distance\": 0.9548756,\n// \"id\": 325,\n// \"entity\": {}\n// }\n// ]]}\n", + "// 4. Search with partition key\nconst query_vectors = [0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]\n\nres = await client.search({\n collection_name: \"test_collection\",\n data: query_vectors,\n filter: \"color == 'green'\",\n output_fields: [\"color_tag\"],\n limit: 3\n})\n\nconsole.log(res.results)\n\n// Output\n// \n// [\n// { score: 2.402090549468994, id: '135', color_tag: 'green_2694' },\n// { score: 2.3938629627227783, id: '326', color_tag: 'green_7104' },\n// { score: 2.3235254287719727, id: '801', color_tag: 'green_3162' }\n// ]\n// \n" + ], + "headingContent": "", + "anchorList": [ + { + "label": "使用Partition Key", + "href": "Use-Partition-Key", + "type": 1, + "isActive": false + }, + { "label": "概述", "href": "Overview", "type": 2, "isActive": false }, + { + "label": "启用Partition Key", + "href": "Enable-partition-key", + "type": 2, + "isActive": false + }, + { + "label": "列出分区", + "href": "List-partitions", + "type": 2, + "isActive": false + }, + { + "label": "插入数据", + "href": "Insert-data", + "type": 2, + "isActive": false + }, + { + "label": "使用分区键", + "href": "Use-partition-key", + "type": 2, + "isActive": false + }, + { + "label": "典型用例", + "href": "Typical-use-cases", + "type": 2, + "isActive": false + } + ] +} diff --git a/localization/v2.4.x/site/zh/userGuide/use-partition-key.md b/localization/v2.4.x/site/zh/userGuide/use-partition-key.md index b4dd93271..85b7dca70 100644 --- a/localization/v2.4.x/site/zh/userGuide/use-partition-key.md +++ b/localization/v2.4.x/site/zh/userGuide/use-partition-key.md @@ -1,9 +1,10 @@ --- id: use-partition-key.md -title: 使用分区密钥 -summary: '' +title: 使用Partition Key +summary: "" --- -

    使用分区密钥

    本指南将指导您使用分区密钥来加速从收藏中检索数据。

    +

    本指南将指导您使用Partition Key来加速从收藏中检索数据。

    概述

    你可以将集合中的一个特定字段设置为分区键,这样 Milvus 就会根据该字段中各自的分区值,将进入的实体分配到不同的分区中。这样,具有相同键值的实体就会被归类到一个分区中,从而在通过键字段进行过滤时,无需扫描无关的分区,从而加快了搜索性能。与传统的过滤方法相比,分区键可以大大提高查询性能。

    -

    您可以使用分区密钥实现多租户。有关多租户的详细信息,请阅读多租户

    -

    启用分区密钥

    要将某个字段设置为分区键,请在创建集合模式时指定partition_key_field

    +

    要将某个字段设置为分区键,请在创建Collection Schema 时指定partition_key_field

    在下面的示例代码中,num_partitions 决定将创建的分区数量。默认情况下,它被设置为16 。建议保留默认值。

    有关参数的更多信息,请参阅 MilvusClient, create_schema()add_field()有关参数的更多信息,请参阅 SDK 参考资料。

    @@ -71,21 +72,22 @@ SERVER_ADDR = "http://localhost:19530"# 1. Set up a Milvus client client = MilvusClient( - uri=SERVER_ADDR +uri=SERVER_ADDR ) # 2. Create a collection schema = MilvusClient.create_schema( - auto_id=False, - enable_dynamic_field=True, - partition_key_field="color", - num_partitions=16 # Number of partitions. Defaults to 16. +auto_id=False, +enable_dynamic_field=True, +partition_key_field="color", +num_partitions=16 # Number of partitions. Defaults to 16. ) schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True) schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=5) schema.add_field(field_name="color", datatype=DataType.VARCHAR, max_length=512) +
    import io.milvus.v2.client.ConnectConfig;
     import io.milvus.v2.client.MilvusClientV2;
     import io.milvus.v2.common.DataType;
    @@ -164,22 +166,23 @@ client = new M
     
    index_params = MilvusClient.prepare_index_params()
     
     index_params.add_index(
    -    field_name="id",
    -    index_type="STL_SORT"
    +field_name="id",
    +index_type="STL_SORT"
     )
     
     index_params.add_index(
    -    field_name="color",
    -    index_type="Trie"
    +field_name="color",
    +index_type="Trie"
     )
     
     index_params.add_index(
    -    field_name="vector",
    -    index_type="IVF_FLAT",
    -    metric_type="L2",
    -    params={"nlist": 1024}
    +field_name="vector",
    +index_type="IVF_FLAT",
    +metric_type="L2",
    +params={"nlist": 1024}
     )
     
    +
    // 2.3 Prepare index parameters
     IndexParam indexParamForVectorField = IndexParam.builder()
         .fieldName("vector")
    @@ -223,6 +226,7 @@ indexParams.add(indexParamForVectorFie
     
     client.createCollection(customizedSetupReq);
     
    +
    // 2.3 Create a collection with fields and index parameters
     res = await client.createCollection({
         collection_name: "test_collection",
    @@ -277,18 +281,19 @@ colors = ["green", for i in range(1000):
    -    current_color = random.choice(colors)
    -    current_tag = random.randint(1000, 9999)
    -    data.append({
    -        "id": i,
    -        "vector": [ random.uniform(-1, 1) for _ in range(5) ],
    -        "color": current_color,
    -        "tag": current_tag,
    -        "color_tag": f"{current_color}_{str(current_tag)}"
    -    })
    +current*color = random.choice(colors)
    +current_tag = random.randint(1000, 9999)
    +data.append({
    +"id": i,
    +"vector": [ random.uniform(-1, 1) for * in range(5) ],
    +"color": current*color,
    +"tag": current_tag,
    +"color_tag": f"{current_color}*{str(current_tag)}"
    +})
     
     print(data[0])
     
    +
    // 3. Insert randomly generated vectors
     List<String> colors = Arrays.asList("green", "blue", "yellow", "red", "black", "white", "purple", "pink", "orange", "brown", "grey");
     List<JSONObject> data = new ArrayList<>();
    @@ -362,22 +367,23 @@ data = []
     # Output
     #
     # {
    -#     "insert_count": 1000,
    -#     "ids": [
    -#         0,
    -#         1,
    -#         2,
    -#         3,
    -#         4,
    -#         5,
    -#         6,
    -#         7,
    -#         8,
    -#         9,
    -#         "(990 more items hidden)"
    -#     ]
    +# "insert_count": 1000,
    +# "ids": [
    +# 0,
    +# 1,
    +# 2,
    +# 3,
    +# 4,
    +# 5,
    +# 6,
    +# 7,
    +# 8,
    +# 9,
    +# "(990 more items hidden)"
    +# ]
     # }
     
    +
    // 3.1 Insert data into the collection
     InsertReq insertReq = InsertReq.builder()
         .collectionName("test_collection")
    @@ -443,12 +449,12 @@ data = []
     query_vectors = [[0.3580376395471989, -0.6023495712049978, 0.18414012509913835, -0.26286205330961354, 0.9029438446296592]]
     
     res = client.search(
    -    collection_name="test_collection",
    -    data=query_vectors,
    -    filter="color == 'green'",
    -    search_params={"metric_type": "L2", "params": {"nprobe": 10}},
    -    output_fields=["id", "color_tag"],
    -    limit=3
    +collection_name="test_collection",
    +data=query_vectors,
    +filter="color == 'green'",
    +search_params={"metric_type": "L2", "params": {"nprobe": 10}},
    +output_fields=["id", "color_tag"],
    +limit=3
     )
     
     print(res)
    @@ -456,34 +462,35 @@ res = client.search(
     # Output
     #
     # [
    -#     [
    -#         {
    -#             "id": 970,
    -#             "distance": 0.5770174264907837,
    -#             "entity": {
    -#                 "id": 970,
    -#                 "color_tag": "green_9828"
    -#             }
    -#         },
    -#         {
    -#             "id": 115,
    -#             "distance": 0.6898155808448792,
    -#             "entity": {
    -#                 "id": 115,
    -#                 "color_tag": "green_4073"
    -#             }
    -#         },
    -#         {
    -#             "id": 899,
    -#             "distance": 0.7028976678848267,
    -#             "entity": {
    -#                 "id": 899,
    -#                 "color_tag": "green_9897"
    -#             }
    -#         }
    -#     ]
    +# [
    +# {
    +# "id": 970,
    +# "distance": 0.5770174264907837,
    +# "entity": {
    +# "id": 970,
    +# "color_tag": "green_9828"
    +# }
    +# },
    +# {
    +# "id": 115,
    +# "distance": 0.6898155808448792,
    +# "entity": {
    +# "id": 115,
    +# "color_tag": "green_4073"
    +# }
    +# },
    +# {
    +# "id": 899,
    +# "distance": 0.7028976678848267,
    +# "entity": {
    +# "id": 899,
    +# "color_tag": "green_9897"
    +# }
    +# }
    +# ]
     # ]
     
    +
    // 4. Search with partition key
     List<List<Float>> query_vectors = Arrays.asList(Arrays.asList(0.3580376395471989f, -0.6023495712049978f, 0.18414012509913835f, -0.26286205330961354f, 0.9029438446296592f));
     
    @@ -554,4 +561,4 @@ res = await client.
           
    -    

    您可以利用分区密钥功能实现更好的搜索性能并启用多租户功能。具体做法是为每个实体指定一个特定于租户的值作为分区键字段。在搜索或查询集合时,通过在布尔表达式中包含分区键字段,可以根据租户特定值过滤实体。这种方法可确保按租户进行数据隔离,并避免扫描不必要的分区。

    +

    您可以利用Partition Key功能实现更好的搜索性能并启用多租户功能。具体做法是为每个实体指定一个特定于租户的值作为分区键字段。在搜索或查询集合时,通过在布尔表达式中包含分区键字段,可以根据租户特定值过滤实体。这种方法可确保按租户进行数据隔离,并避免扫描不必要的分区。

    diff --git a/tools/glossary.json b/tools/glossary.json index 949d582da..283a11ef7 100644 --- a/tools/glossary.json +++ b/tools/glossary.json @@ -39,5 +39,7 @@ { "source": "Azure", "target": "Azure" }, { "source": "On the Fly", "target": "动态配置" }, { "source": "Milvus CDC", "target": "Milvus CDC" }, - { "source": "CDC", "target": "CDC" } + { "source": "CDC", "target": "CDC" }, + { "source": "Dynamic Field", "target": "Dynamic Field" }, + { "source": "Partition Key", "target": "Partition Key" } ] diff --git a/tools/utils.js b/tools/utils.js index c5a214195..fdfacf092 100644 --- a/tools/utils.js +++ b/tools/utils.js @@ -31,7 +31,7 @@ const DEEPL_HEADERS = { "Content-Type": "application/json", Authorization: `DeepL-Auth-Key ${DEEPL_API_KEY}`, }; -const GLOSSARY_ID = "7eb24f3c-0cb9-4bf6-9201-8c48860e9bbe"; +const GLOSSARY_ID = "d9c474dc-b052-4aee-938e-997ac704e880"; export function traverseDirectory(dirPath, fileList = []) { const files = fs.readdirSync(dirPath); diff --git a/v2.0.x/site/zh-CN/reference/glossary.md b/v2.0.x/site/zh-CN/reference/glossary.md index b51844fc5..6f27af303 100644 --- a/v2.0.x/site/zh-CN/reference/glossary.md +++ b/v2.0.x/site/zh-CN/reference/glossary.md @@ -48,7 +48,7 @@ PChannel 表示物理信道。每个 PChannel 对应一个日志存储主题。 ## Schema -模式是定义数据类型和数据属性的元信息。每个集合都有自己的集合模式,该模式定义了集合的所有字段、自动ID (主键) 分配支持以及集合描述。集合模式中还包括定义字段名称、数据类型和其他属性的字段模式。 +模式是定义数据类型和数据属性的元信息。每个集合都有自己的Collection Schema ,该模式定义了集合的所有字段、自动ID (主键) 分配支持以及集合描述。Collection Schema 中还包括定义字段名称、数据类型和其他属性的字段 Schema 。 ## Segment