from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility from app.core.config import settings def init_milvus(): connections.connect("default", host=settings.MILVUS_HOST, port=settings.MILVUS_PORT) if utility.has_collection(settings.COLLECTION_NAME): return Collection(settings.COLLECTION_NAME) # 定义字段 fields = [ FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), FieldSchema(name="img_md5", dtype=DataType.VARCHAR, max_length=64), # 用于去重 FieldSchema(name="img_path", dtype=DataType.VARCHAR, max_length=512), FieldSchema(name="source_id", dtype=DataType.VARCHAR, max_length=64), FieldSchema(name="lang", dtype=DataType.VARCHAR, max_length=10), FieldSchema(name="card_name", dtype=DataType.VARCHAR, max_length=256), FieldSchema(name="card_num", dtype=DataType.VARCHAR, max_length=64), FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=settings.VECTOR_DIM), ] schema = CollectionSchema(fields, "Pokemon Card Search Collection") collection = Collection(settings.COLLECTION_NAME, schema) # 创建索引 index_params = { "metric_type": "COSINE", "index_type": "HNSW", "params": {"M": 8, "efConstruction": 64} } collection.create_index(field_name="vector", index_params=index_params) # 为 MD5 创建标量索引,加速去重查询 collection.create_index(field_name="img_md5", index_name="idx_md5") collection.load() return collection # 全局实例 milvus_collection = init_milvus()