milvus_client.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
  2. from app.core.config import settings
  3. def init_milvus():
  4. print(f"🔌 连接 Milvus: {settings.MILVUS_HOST}:{settings.MILVUS_PORT}")
  5. connections.connect("default", host=settings.MILVUS_HOST, port=settings.MILVUS_PORT)
  6. if utility.has_collection(settings.COLLECTION_NAME):
  7. return Collection(settings.COLLECTION_NAME)
  8. # 定义字段
  9. fields = [
  10. FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
  11. FieldSchema(name="img_md5", dtype=DataType.VARCHAR, max_length=64), # 用于去重
  12. FieldSchema(name="img_path", dtype=DataType.VARCHAR, max_length=512),
  13. FieldSchema(name="source_id", dtype=DataType.VARCHAR, max_length=64),
  14. FieldSchema(name="lang", dtype=DataType.VARCHAR, max_length=10),
  15. FieldSchema(name="card_name", dtype=DataType.VARCHAR, max_length=256),
  16. FieldSchema(name="card_num", dtype=DataType.INT64),
  17. FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=settings.VECTOR_DIM),
  18. ]
  19. schema = CollectionSchema(fields, "Pokemon Card Search Collection")
  20. collection = Collection(settings.COLLECTION_NAME, schema)
  21. # 创建索引
  22. index_params = {
  23. "metric_type": "COSINE",
  24. "index_type": "HNSW",
  25. "params": {"M": 8, "efConstruction": 64}
  26. }
  27. collection.create_index(field_name="vector", index_params=index_params)
  28. # 为 MD5 创建标量索引,加速去重查询
  29. collection.create_index(field_name="img_md5", index_name="idx_md5")
  30. collection.load()
  31. return collection
  32. # 全局实例
  33. milvus_collection = init_milvus()