milvus_client.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
  2. from app.core.config import settings
  3. def init_milvus():
  4. connections.connect("default", host=settings.MILVUS_HOST, port=settings.MILVUS_PORT)
  5. if utility.has_collection(settings.COLLECTION_NAME):
  6. return Collection(settings.COLLECTION_NAME)
  7. # 定义字段
  8. fields = [
  9. FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
  10. FieldSchema(name="img_md5", dtype=DataType.VARCHAR, max_length=64), # 用于去重
  11. FieldSchema(name="img_path", dtype=DataType.VARCHAR, max_length=512),
  12. FieldSchema(name="source_id", dtype=DataType.VARCHAR, max_length=64),
  13. FieldSchema(name="lang", dtype=DataType.VARCHAR, max_length=10),
  14. FieldSchema(name="card_name", dtype=DataType.VARCHAR, max_length=256),
  15. FieldSchema(name="card_num", dtype=DataType.VARCHAR, max_length=64),
  16. FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=settings.VECTOR_DIM),
  17. ]
  18. schema = CollectionSchema(fields, "Pokemon Card Search Collection")
  19. collection = Collection(settings.COLLECTION_NAME, schema)
  20. # 创建索引
  21. index_params = {
  22. "metric_type": "COSINE",
  23. "index_type": "HNSW",
  24. "params": {"M": 8, "efConstruction": 64}
  25. }
  26. collection.create_index(field_name="vector", index_params=index_params)
  27. # 为 MD5 创建标量索引,加速去重查询
  28. collection.create_index(field_name="img_md5", index_name="idx_md5")
  29. collection.load()
  30. return collection
  31. # 全局实例
  32. milvus_collection = init_milvus()