From 66829d071eebb5ffd2ef4df0bb3298d311114ca7 Mon Sep 17 00:00:00 2001 From: BossZou <40255591+BossZou@users.noreply.github.com> Date: Mon, 8 Mar 2021 19:26:14 +0800 Subject: [PATCH] Remove unused hybrid APIs (#433) Signed-off-by: yinghao.zou --- CHANGELOG.md | 8 +- milvus/client/prepare.py | 266 --------------------------------------- milvus/client/stub.py | 25 ---- requirements.txt | 2 +- 4 files changed, 8 insertions(+), 293 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bfa843b7..6a20194b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,10 @@ -# pymilvus 1.0.0(TBD) +# pymilvus 1.0.x(TBD) + +## Task +- \#432 - Remove hybrid APIs + + +# pymilvus 1.0.0(2021-03-05) ## Improvement - \#424 - Prepare for 1.x diff --git a/milvus/client/prepare.py b/milvus/client/prepare.py index 1e27a0b36..f1b12a4c7 100644 --- a/milvus/client/prepare.py +++ b/milvus/client/prepare.py @@ -59,30 +59,6 @@ def collection_schema(cls, collection_name, dimension, index_file_size, metric_t return _param - @classmethod - def collection_hybrid_schema(cls, collection_name, fields): - _param = grpc_types.Mapping( - collection_name=collection_name - ) - - for field in fields: - if "data_type" in field: - ft = grpc_types.FieldType(data_type=int(field["data_type"])) - # ft.data_type = int(v["data_type"]) - elif "dimension" in field: - ft = grpc_types.FieldType(vector_param=grpc_types.VectorFieldParam(dimension=field["dimension"])) - # ft.vector_param = grpc_types.VectorFieldParam(dimension=v["dimension"]) - else: - raise ValueError("Collection field not support {}".format(field)) - field_param = grpc_types.FieldParam(name=field["field_name"], type=ft) - extra_params = field.get("extra_params", None) - if extra_params: - u = ujson.dumps(extra_params) - field_param.extra_params.add(key="params", value=u) - _param.fields.append(field_param) - - return _param - @classmethod def preload_param(cls, collection_name, partition_tags): return grpc_types.PreloadCollectionParam(collection_name=collection_name, partition_tag_array=partition_tags) @@ -113,61 +89,6 @@ def insert_param(cls, collection_name, vectors, partition_tag, ids=None, params= return _param - @classmethod - def insert_hybrid_param(cls, collection_name, tag, entities, vector_entities, ids=None, params=None): - entity_param = grpc_types.HEntity() - - _len = -1 - for entity in entities: - values = entity["field_values"] - if not isinstance(values, list): - raise ValueError("Field values must be a list") - if _len == -1: - _len = len(values) - else: - if len(values) != _len: - raise ValueError("Length is not equal") - - for entity in entities: - entity_param.field_names.append(entity["field_name"]) - values = entity["field_values"] - if isinstance(values, list): - if isinstance(values[0], int): - entity_param.attr_data.add(int_value=values) - elif isinstance(values[0], float): - entity_param.attr_data.add(double_value=values) - else: - raise ValueError("Field item must be int or float") - else: - raise ValueError("Field values must be a list") - # entity_param.attr_records = bytes(item_bytes) - entity_param.row_num = _len - # vectors - # entity.field_names.append(vector_field) - for vector_entity in vector_entities: - entity_param.field_names.append(vector_entity["field_name"]) - vector_field = grpc_types.VectorFieldRecord() - vectors = vector_entity["field_values"] - for vector in vectors: - if isinstance(vector, bytes): - vector_field.value.add(binary_data=vector) - else: - vector_field.value.add(float_data=vector) - entity_param.vector_data.append(vector_field) - - h_param = grpc_types.HInsertParam( - collection_name=collection_name, - partition_tag=tag, - entity=entity_param - ) - - if ids: - h_param.entity_id_array[:] = ids - params = params or dict() - params_str = ujson.dumps(params) - h_param.extra_params.add(key="params", value=params_str) - return h_param - @classmethod def index_param(cls, collection_name, index_type, params): @@ -201,193 +122,6 @@ def search_param(cls, collection_name, topk, query_records, partitions, params): return search_param - @classmethod - def search_hybrid_pb_param(cls, collection_name, query_entities, partition_tags, params): - - def term_query(node): - if len(node) > 1: - raise Exception() - for k, v in node.items(): - vs = v.get("values", None) - if not vs: - raise ValueError("Key values is missing") - - _term_param = grpc_types.TermQuery(field_name=k, - value_num=len(v["values"]), - # boost=node["boost"] - ) - if isinstance(vs, list): - if isinstance(vs[0], int): - _term_param.int_value[:] = vs - elif isinstance(vs[0], float): - _term_param.double_value[:] = vs - else: - raise ValueError("Field item must be int or float") - else: - raise ValueError("Field values must be a list") - return _term_param - - def range_query(node): - if len(node) > 1: - raise Exception("Item size > 1") - for name, query in node.items(): - _range_param = grpc_types.RangeQuery(field_name=name, - # boost=node["boost"] - ) - for k, v in query["ranges"].items(): - ope = RangeOperatorMap[k] - _range_param.operand.add(operator=ope, operand=str(v)) - - return _range_param - - def vector_query(node): - if len(node) > 1: - raise Exception("Item size > 1") - for name, query in node.items(): - _vector_param = grpc_types.VectorQuery(field_name=name, - # query_boost=node["boost"], - topk=query["topk"] - ) - for vector in query["query"]: - if isinstance(vector, bytes): - _vector_param.records.add(binary_data=vector) - else: - _vector_param.records.add(float_data=vector) - - _extra_param = query.get("params", None) - - _extra_param = _extra_param or dict() - params_str = ujson.dumps(_extra_param) - _vector_param.extra_params.add(key="params", value=params_str) - return _vector_param - - def gene_node(key, node): - if isinstance(node, list): - bqr = grpc_types.BooleanQuery(occur=BoolOccurMap[key]) - for query in node: - if "term" in query: - # bqr.general_query.append(grpc_types.GeneralQuery(term_query=term_query(query["term"]))) - bqr.general_query.add(term_query=term_query(query["term"])) - elif "range" in query: - # bqr.general_query.append(grpc_types.GeneralQuery(range_query=range_query(query["range"]))) - bqr.general_query.add(range_query=range_query(query["range"])) - elif "vector" in query: - # bqr.general_query.append(grpc_types.GeneralQuery(vector_query=vector_query(query["vector"]))) - bqr.general_query.add(vector_query=vector_query(query["vector"])) - else: - raise ValueError("Unknown ") - - return grpc_types.GeneralQuery(boolean_query=bqr) - - keys = node.keys() - sq = {"must", "must_not", "should"} - if len(keys) + len(sq) > len(set(keys) | sq): - gqs = list() - for k, v in node.items(): - gq = gene_node(k, v) - gqs.append(gq) - if len(gqs) == 1: - return gqs[0] - - bq0 = grpc_types.BooleanQuery(occur=grpc_types.INVALID) - for g in gqs: - bq0.general_query.append(g) - return grpc_types.GeneralQuery(boolean_query=bq0) - - # bqr = grpc_types.BooleanQuery(occur=BoolOccurMap[key]) - # for k, v in node.items(): - # field_name = node["field_name"] - # if k == "term": - # bqr.general_query.append(grpc_types.GeneralQuery(term_query=term_query(v))) - # elif k == "range": - # bqr.general_query.append(grpc_types.GeneralQuery(range_query=range_query(v))) - # elif k == "vector": - # bqr.general_query.append(grpc_types.GeneralQuery(vector_query=vector_query(v))) - # else: - # raise ValueError("Unknown ") - # - # return grpc_types.GeneralQuery(boolean_query=bqr) - - # if len(node) == 1: - # for k, v in node.items(): - # if k in ("must", "must_not", "should"): - # bq = grpc_types.BooleanQuery(occur=BoolOccurMap[k]) - # - # for k, v in node.items(): - # if k in ("must", "must_not", "should"): - # len(node) == 1: - # vqq = grpc_types.BooleanQuery(occur=grpc_types.INVALID) - # if k in ("must", "must_not", "should"): - # bq = grpc_types.BooleanQuery(occur=BoolOccurMap[k]) - # vqq.general_query.append(grpc_types.GeneralQuery(boolean_query=bq)) - - _param = grpc_types.HSearchParamPB( - collection_name=collection_name, - partition_tag_array=partition_tags - ) - - _param.general_query.CopyFrom(gene_node(None, query_entities["bool"])) - - for k, v in query_entities.items(): - if k == "bool": - continue - _param.extra_params.add(key=k, value=ujson.dumps(v)) - - # import pdb;pdb.set_trace() - # grpc_types.GeneralQuery(boolean_query=bool_node(query_entities)) - # params = params or dict() - # params_str = ujson.dumps(params) - # _param.extra_params.add(key="params", value=params_str) - - return _param - - @classmethod - def search_hybrid_param(cls, collection_name, vector_params, dsl, partition_tags, params): - # def replace_range_item(d): - # if not isinstance(d, dict): - # return - # - # if "range" not in d: - # for ki, vi in d.itmes(): - # replace_range_item(vi) - # else: - # range = d["range"] - # for ki, vi in range.itmes(): - # ranges = vi["values"] - # for kii, vii in ranges.items(): - # ranges.pop(kii) - # ranges[int(kii)] = vii - # return - - # dsl_out = copy.deepcopy(dsl) - # replace_range_item(dsl_out) - - dsl_str = dsl if isinstance(dsl, str) else ujson.dumps(dsl) - hybrid_param = grpc_types.HSearchParam(collection_name=collection_name, - partition_tag_array=partition_tags or [], - dsl=dsl_str) - - for v_p in vector_params: - if "vector" not in v_p: - raise ParamError("Vector param must contains key \'vector\'") - # TODO: may need to copy vector_params - query_vectors = v_p.pop("vector") - json_ = ujson.dumps(v_p) - - vector_param = grpc_types.VectorParam(json=json_) - for vector in query_vectors: - if isinstance(vector, bytes): - vector_param.row_record.add(binary_data=vector) - else: - vector_param.row_record.add(float_data=vector) - hybrid_param.vector_param.append(vector_param) - - _params = params or dict() - for k, v in _params.items(): - hybrid_param.extra_params.add(key=k, value=ujson.dumps(v)) - - return hybrid_param - @classmethod def search_by_ids_param(cls, collection_name, ids, top_k, partition_tag_array, params): _param = grpc_types.SearchByIDParam( diff --git a/milvus/client/stub.py b/milvus/client/stub.py index f30553cf2..381423c3b 100644 --- a/milvus/client/stub.py +++ b/milvus/client/stub.py @@ -266,11 +266,6 @@ def create_collection(self, param, timeout=30): with self._connection() as handler: return handler.create_collection(collection_name, dim, index_file_size, metric_type, collection_param, timeout) - @check_connect - def create_hybrid_collection(self, collection_name, fields, timeout=30): - with self._connection() as handler: - return handler.create_hybrid_collection(collection_name, fields, timeout) - @check_connect def has_collection(self, collection_name, timeout=30): """ @@ -440,11 +435,6 @@ def insert(self, collection_name, records, ids=None, partition_tag=None, params= with self._connection() as handler: return handler.insert(collection_name, records, ids, partition_tag, params, timeout, **kwargs) - @check_connect - def insert_hybrid(self, collection_name, entities, vector_entities, ids=None, partition_tag=None, params=None): - with self._connection() as handler: - return handler.insert_hybrid(collection_name, entities, vector_entities, ids, partition_tag, params) - def get_entity_by_id(self, collection_name, ids, timeout=None): """ Returns raw vectors according to ids. @@ -464,11 +454,6 @@ def get_entity_by_id(self, collection_name, ids, timeout=None): with self._connection() as handler: return handler.get_vectors_by_ids(collection_name, ids, timeout=timeout) - def get_hybrid_entity_by_id(self, collection_name, ids): - check_pass_param(collection_name=collection_name, ids=ids) - with self._connection() as handler: - return handler.get_hybrid_entity_by_id(collection_name, ids) - @check_connect def list_id_in_segment(self, collection_name, segment_name, timeout=None): check_pass_param(collection_name=collection_name) @@ -670,16 +655,6 @@ def search(self, collection_name, top_k, query_records, partition_tags=None, par with self._connection() as handler: return handler.search(collection_name, top_k, query_records, partition_tags, params, timeout, **kwargs) - @check_connect - def search_hybrid_pb(self, collection_name, query_entities, partition_tags=None, params=None, **kwargs): - with self._connection() as handler: - return handler.search_hybrid_pb(collection_name, query_entities, partition_tags, params, **kwargs) - - @check_connect - def search_hybrid(self, collection_name, vector_params, dsl, partition_tags=None, params=None, **kwargs): - with self._connection() as handler: - return handler.search_hybrid(collection_name, vector_params, dsl, partition_tags, params, **kwargs) - @check_connect def search_in_segment(self, collection_name, file_ids, query_records, top_k, params=None, timeout=None, **kwargs): """ diff --git a/requirements.txt b/requirements.txt index e45c8d8d7..ff4bb4cb0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,7 +31,7 @@ numpy==1.18.1 packaging==20.0 pkginfo==1.5.0.1 pluggy==0.13.1 -pprint==0.1 +# pprint==0.1 protobuf==3.11.2 py==1.8.1 pycparser==2.19