| 123456789101112131415161718192021222324252627282930313233343536373839404142 |
- from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
- from app.core.config import settings
- def init_milvus():
- connections.connect("default", host=settings.MILVUS_HOST, port=settings.MILVUS_PORT)
- if utility.has_collection(settings.COLLECTION_NAME):
- return Collection(settings.COLLECTION_NAME)
- # 定义字段
- fields = [
- FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
- FieldSchema(name="img_md5", dtype=DataType.VARCHAR, max_length=64), # 用于去重
- FieldSchema(name="img_path", dtype=DataType.VARCHAR, max_length=512),
- FieldSchema(name="source_id", dtype=DataType.VARCHAR, max_length=64),
- FieldSchema(name="lang", dtype=DataType.VARCHAR, max_length=10),
- FieldSchema(name="card_name", dtype=DataType.VARCHAR, max_length=256),
- FieldSchema(name="card_num", dtype=DataType.VARCHAR, max_length=64),
- FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=settings.VECTOR_DIM),
- ]
- schema = CollectionSchema(fields, "Pokemon Card Search Collection")
- collection = Collection(settings.COLLECTION_NAME, schema)
- # 创建索引
- index_params = {
- "metric_type": "COSINE",
- "index_type": "HNSW",
- "params": {"M": 8, "efConstruction": 64}
- }
- collection.create_index(field_name="vector", index_params=index_params)
- # 为 MD5 创建标量索引,加速去重查询
- collection.create_index(field_name="img_md5", index_name="idx_md5")
- collection.load()
- return collection
- # 全局实例
- milvus_collection = init_milvus()
|