Skip to content

Commit

Permalink
Remove unused hybrid APIs (#433)
Browse files Browse the repository at this point in the history
Signed-off-by: yinghao.zou <[email protected]>
  • Loading branch information
BossZou committed Mar 8, 2021
1 parent 0ef9fa7 commit 66829d0
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 293 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
# pymilvus 1.0.0(TBD)
# pymilvus 1.0.x(TBD)

## Task
- \#432 - Remove hybrid APIs


# pymilvus 1.0.0(2021-03-05)

## Improvement
- \#424 - Prepare for 1.x
Expand Down
266 changes: 0 additions & 266 deletions milvus/client/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,30 +59,6 @@ def collection_schema(cls, collection_name, dimension, index_file_size, metric_t

return _param

@classmethod
def collection_hybrid_schema(cls, collection_name, fields):
_param = grpc_types.Mapping(
collection_name=collection_name
)

for field in fields:
if "data_type" in field:
ft = grpc_types.FieldType(data_type=int(field["data_type"]))
# ft.data_type = int(v["data_type"])
elif "dimension" in field:
ft = grpc_types.FieldType(vector_param=grpc_types.VectorFieldParam(dimension=field["dimension"]))
# ft.vector_param = grpc_types.VectorFieldParam(dimension=v["dimension"])
else:
raise ValueError("Collection field not support {}".format(field))
field_param = grpc_types.FieldParam(name=field["field_name"], type=ft)
extra_params = field.get("extra_params", None)
if extra_params:
u = ujson.dumps(extra_params)
field_param.extra_params.add(key="params", value=u)
_param.fields.append(field_param)

return _param

@classmethod
def preload_param(cls, collection_name, partition_tags):
return grpc_types.PreloadCollectionParam(collection_name=collection_name, partition_tag_array=partition_tags)
Expand Down Expand Up @@ -113,61 +89,6 @@ def insert_param(cls, collection_name, vectors, partition_tag, ids=None, params=

return _param

@classmethod
def insert_hybrid_param(cls, collection_name, tag, entities, vector_entities, ids=None, params=None):
entity_param = grpc_types.HEntity()

_len = -1
for entity in entities:
values = entity["field_values"]
if not isinstance(values, list):
raise ValueError("Field values must be a list")
if _len == -1:
_len = len(values)
else:
if len(values) != _len:
raise ValueError("Length is not equal")

for entity in entities:
entity_param.field_names.append(entity["field_name"])
values = entity["field_values"]
if isinstance(values, list):
if isinstance(values[0], int):
entity_param.attr_data.add(int_value=values)
elif isinstance(values[0], float):
entity_param.attr_data.add(double_value=values)
else:
raise ValueError("Field item must be int or float")
else:
raise ValueError("Field values must be a list")
# entity_param.attr_records = bytes(item_bytes)
entity_param.row_num = _len
# vectors
# entity.field_names.append(vector_field)
for vector_entity in vector_entities:
entity_param.field_names.append(vector_entity["field_name"])
vector_field = grpc_types.VectorFieldRecord()
vectors = vector_entity["field_values"]
for vector in vectors:
if isinstance(vector, bytes):
vector_field.value.add(binary_data=vector)
else:
vector_field.value.add(float_data=vector)
entity_param.vector_data.append(vector_field)

h_param = grpc_types.HInsertParam(
collection_name=collection_name,
partition_tag=tag,
entity=entity_param
)

if ids:
h_param.entity_id_array[:] = ids
params = params or dict()
params_str = ujson.dumps(params)
h_param.extra_params.add(key="params", value=params_str)
return h_param

@classmethod
def index_param(cls, collection_name, index_type, params):

Expand Down Expand Up @@ -201,193 +122,6 @@ def search_param(cls, collection_name, topk, query_records, partitions, params):

return search_param

@classmethod
def search_hybrid_pb_param(cls, collection_name, query_entities, partition_tags, params):

def term_query(node):
if len(node) > 1:
raise Exception()
for k, v in node.items():
vs = v.get("values", None)
if not vs:
raise ValueError("Key values is missing")

_term_param = grpc_types.TermQuery(field_name=k,
value_num=len(v["values"]),
# boost=node["boost"]
)
if isinstance(vs, list):
if isinstance(vs[0], int):
_term_param.int_value[:] = vs
elif isinstance(vs[0], float):
_term_param.double_value[:] = vs
else:
raise ValueError("Field item must be int or float")
else:
raise ValueError("Field values must be a list")
return _term_param

def range_query(node):
if len(node) > 1:
raise Exception("Item size > 1")
for name, query in node.items():
_range_param = grpc_types.RangeQuery(field_name=name,
# boost=node["boost"]
)
for k, v in query["ranges"].items():
ope = RangeOperatorMap[k]
_range_param.operand.add(operator=ope, operand=str(v))

return _range_param

def vector_query(node):
if len(node) > 1:
raise Exception("Item size > 1")
for name, query in node.items():
_vector_param = grpc_types.VectorQuery(field_name=name,
# query_boost=node["boost"],
topk=query["topk"]
)
for vector in query["query"]:
if isinstance(vector, bytes):
_vector_param.records.add(binary_data=vector)
else:
_vector_param.records.add(float_data=vector)

_extra_param = query.get("params", None)

_extra_param = _extra_param or dict()
params_str = ujson.dumps(_extra_param)
_vector_param.extra_params.add(key="params", value=params_str)
return _vector_param

def gene_node(key, node):
if isinstance(node, list):
bqr = grpc_types.BooleanQuery(occur=BoolOccurMap[key])
for query in node:
if "term" in query:
# bqr.general_query.append(grpc_types.GeneralQuery(term_query=term_query(query["term"])))
bqr.general_query.add(term_query=term_query(query["term"]))
elif "range" in query:
# bqr.general_query.append(grpc_types.GeneralQuery(range_query=range_query(query["range"])))
bqr.general_query.add(range_query=range_query(query["range"]))
elif "vector" in query:
# bqr.general_query.append(grpc_types.GeneralQuery(vector_query=vector_query(query["vector"])))
bqr.general_query.add(vector_query=vector_query(query["vector"]))
else:
raise ValueError("Unknown ")

return grpc_types.GeneralQuery(boolean_query=bqr)

keys = node.keys()
sq = {"must", "must_not", "should"}
if len(keys) + len(sq) > len(set(keys) | sq):
gqs = list()
for k, v in node.items():
gq = gene_node(k, v)
gqs.append(gq)
if len(gqs) == 1:
return gqs[0]

bq0 = grpc_types.BooleanQuery(occur=grpc_types.INVALID)
for g in gqs:
bq0.general_query.append(g)
return grpc_types.GeneralQuery(boolean_query=bq0)

# bqr = grpc_types.BooleanQuery(occur=BoolOccurMap[key])
# for k, v in node.items():
# field_name = node["field_name"]
# if k == "term":
# bqr.general_query.append(grpc_types.GeneralQuery(term_query=term_query(v)))
# elif k == "range":
# bqr.general_query.append(grpc_types.GeneralQuery(range_query=range_query(v)))
# elif k == "vector":
# bqr.general_query.append(grpc_types.GeneralQuery(vector_query=vector_query(v)))
# else:
# raise ValueError("Unknown ")
#
# return grpc_types.GeneralQuery(boolean_query=bqr)

# if len(node) == 1:
# for k, v in node.items():
# if k in ("must", "must_not", "should"):
# bq = grpc_types.BooleanQuery(occur=BoolOccurMap[k])
#
# for k, v in node.items():
# if k in ("must", "must_not", "should"):
# len(node) == 1:
# vqq = grpc_types.BooleanQuery(occur=grpc_types.INVALID)
# if k in ("must", "must_not", "should"):
# bq = grpc_types.BooleanQuery(occur=BoolOccurMap[k])
# vqq.general_query.append(grpc_types.GeneralQuery(boolean_query=bq))

_param = grpc_types.HSearchParamPB(
collection_name=collection_name,
partition_tag_array=partition_tags
)

_param.general_query.CopyFrom(gene_node(None, query_entities["bool"]))

for k, v in query_entities.items():
if k == "bool":
continue
_param.extra_params.add(key=k, value=ujson.dumps(v))

# import pdb;pdb.set_trace()
# grpc_types.GeneralQuery(boolean_query=bool_node(query_entities))
# params = params or dict()
# params_str = ujson.dumps(params)
# _param.extra_params.add(key="params", value=params_str)

return _param

@classmethod
def search_hybrid_param(cls, collection_name, vector_params, dsl, partition_tags, params):
# def replace_range_item(d):
# if not isinstance(d, dict):
# return
#
# if "range" not in d:
# for ki, vi in d.itmes():
# replace_range_item(vi)
# else:
# range = d["range"]
# for ki, vi in range.itmes():
# ranges = vi["values"]
# for kii, vii in ranges.items():
# ranges.pop(kii)
# ranges[int(kii)] = vii
# return

# dsl_out = copy.deepcopy(dsl)
# replace_range_item(dsl_out)

dsl_str = dsl if isinstance(dsl, str) else ujson.dumps(dsl)
hybrid_param = grpc_types.HSearchParam(collection_name=collection_name,
partition_tag_array=partition_tags or [],
dsl=dsl_str)

for v_p in vector_params:
if "vector" not in v_p:
raise ParamError("Vector param must contains key \'vector\'")
# TODO: may need to copy vector_params
query_vectors = v_p.pop("vector")
json_ = ujson.dumps(v_p)

vector_param = grpc_types.VectorParam(json=json_)
for vector in query_vectors:
if isinstance(vector, bytes):
vector_param.row_record.add(binary_data=vector)
else:
vector_param.row_record.add(float_data=vector)
hybrid_param.vector_param.append(vector_param)

_params = params or dict()
for k, v in _params.items():
hybrid_param.extra_params.add(key=k, value=ujson.dumps(v))

return hybrid_param

@classmethod
def search_by_ids_param(cls, collection_name, ids, top_k, partition_tag_array, params):
_param = grpc_types.SearchByIDParam(
Expand Down
25 changes: 0 additions & 25 deletions milvus/client/stub.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,11 +266,6 @@ def create_collection(self, param, timeout=30):
with self._connection() as handler:
return handler.create_collection(collection_name, dim, index_file_size, metric_type, collection_param, timeout)

@check_connect
def create_hybrid_collection(self, collection_name, fields, timeout=30):
with self._connection() as handler:
return handler.create_hybrid_collection(collection_name, fields, timeout)

@check_connect
def has_collection(self, collection_name, timeout=30):
"""
Expand Down Expand Up @@ -440,11 +435,6 @@ def insert(self, collection_name, records, ids=None, partition_tag=None, params=
with self._connection() as handler:
return handler.insert(collection_name, records, ids, partition_tag, params, timeout, **kwargs)

@check_connect
def insert_hybrid(self, collection_name, entities, vector_entities, ids=None, partition_tag=None, params=None):
with self._connection() as handler:
return handler.insert_hybrid(collection_name, entities, vector_entities, ids, partition_tag, params)

def get_entity_by_id(self, collection_name, ids, timeout=None):
"""
Returns raw vectors according to ids.
Expand All @@ -464,11 +454,6 @@ def get_entity_by_id(self, collection_name, ids, timeout=None):
with self._connection() as handler:
return handler.get_vectors_by_ids(collection_name, ids, timeout=timeout)

def get_hybrid_entity_by_id(self, collection_name, ids):
check_pass_param(collection_name=collection_name, ids=ids)
with self._connection() as handler:
return handler.get_hybrid_entity_by_id(collection_name, ids)

@check_connect
def list_id_in_segment(self, collection_name, segment_name, timeout=None):
check_pass_param(collection_name=collection_name)
Expand Down Expand Up @@ -670,16 +655,6 @@ def search(self, collection_name, top_k, query_records, partition_tags=None, par
with self._connection() as handler:
return handler.search(collection_name, top_k, query_records, partition_tags, params, timeout, **kwargs)

@check_connect
def search_hybrid_pb(self, collection_name, query_entities, partition_tags=None, params=None, **kwargs):
with self._connection() as handler:
return handler.search_hybrid_pb(collection_name, query_entities, partition_tags, params, **kwargs)

@check_connect
def search_hybrid(self, collection_name, vector_params, dsl, partition_tags=None, params=None, **kwargs):
with self._connection() as handler:
return handler.search_hybrid(collection_name, vector_params, dsl, partition_tags, params, **kwargs)

@check_connect
def search_in_segment(self, collection_name, file_ids, query_records, top_k, params=None, timeout=None, **kwargs):
"""
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ numpy==1.18.1
packaging==20.0
pkginfo==1.5.0.1
pluggy==0.13.1
pprint==0.1
# pprint==0.1
protobuf==3.11.2
py==1.8.1
pycparser==2.19
Expand Down

0 comments on commit 66829d0

Please sign in to comment.