Skip to content

Commit

Permalink
feat: [2.4] [Sparse Float Vector] added some integration tests (#32093)
Browse files Browse the repository at this point in the history
add some integration tests for sparse float vector support

#29419

pr: #31062

Signed-off-by: Buqian Zheng <[email protected]>
  • Loading branch information
zhengbuqian authored Apr 10, 2024
1 parent bb2ffd4 commit c8aacc6
Show file tree
Hide file tree
Showing 12 changed files with 809 additions and 47 deletions.
2 changes: 1 addition & 1 deletion internal/proxy/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -1173,7 +1173,7 @@ func fillFieldsDataBySchema(schema *schemapb.CollectionSchema, insertMsg *msgstr
}

if len(insertMsg.FieldsData) != requiredFieldsNum {
log.Warn("the number of fields is less than needed",
log.Warn("the number of fields is not the same as needed",
zap.Int("fieldNum", len(insertMsg.FieldsData)),
zap.Int("requiredFieldNum", requiredFieldsNum),
zap.String("collection", schema.GetName()))
Expand Down
2 changes: 2 additions & 0 deletions pkg/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ const (
DimKey = "dim"
MaxLengthKey = "max_length"
MaxCapacityKey = "max_capacity"

DropRatioBuildKey = "drop_ratio_build"
)

// Collection properties key
Expand Down
77 changes: 69 additions & 8 deletions tests/integration/getvector/get_vector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,19 +86,23 @@ func (s *TestGetVectorSuite) run() {
IndexParams: nil,
AutoID: false,
}
typeParams := []*commonpb.KeyValuePair{}
if !typeutil.IsSparseFloatVectorType(s.vecType) {
typeParams = []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: fmt.Sprintf("%d", dim),
},
}
}
fVec := &schemapb.FieldSchema{
FieldID: 101,
Name: vecFieldName,
IsPrimaryKey: false,
Description: "",
DataType: s.vecType,
TypeParams: []*commonpb.KeyValuePair{
{
Key: common.DimKey,
Value: fmt.Sprintf("%d", dim),
},
},
IndexParams: nil,
TypeParams: typeParams,
IndexParams: nil,
}
schema := integration.ConstructSchema(collection, dim, false, pk, fVec)
marshaledSchema, err := proto.Marshal(schema)
Expand Down Expand Up @@ -126,6 +130,8 @@ func (s *TestGetVectorSuite) run() {
vecFieldData = integration.NewFloat16VectorFieldData(vecFieldName, NB, dim)
// } else if s.vecType == schemapb.DataType_BFloat16Vector {
// vecFieldData = integration.NewBFloat16VectorFieldData(vecFieldName, NB, dim)
} else if typeutil.IsSparseFloatVectorType(s.vecType) {
vecFieldData = integration.NewSparseFloatVectorFieldData(vecFieldName, NB)
} else {
vecFieldData = integration.NewBinaryVectorFieldData(vecFieldName, NB, dim)
}
Expand Down Expand Up @@ -193,7 +199,7 @@ func (s *TestGetVectorSuite) run() {

searchResp, err := s.Cluster.Proxy.Search(ctx, searchReq)
s.Require().NoError(err)
s.Require().Equal(searchResp.GetStatus().GetErrorCode(), commonpb.ErrorCode_Success)
s.Require().Equal(commonpb.ErrorCode_Success, searchResp.GetStatus().GetErrorCode())

result := searchResp.GetResults()
if s.pkType == schemapb.DataType_Int64 {
Expand Down Expand Up @@ -253,6 +259,21 @@ func (s *TestGetVectorSuite) run() {
// }
// }
} else if s.vecType == schemapb.DataType_BFloat16Vector {
} else if s.vecType == schemapb.DataType_SparseFloatVector {
s.Require().Len(result.GetFieldsData()[vecFieldIndex].GetVectors().GetSparseFloatVector().GetContents(), nq*topk)
rawData := vecFieldData.GetVectors().GetSparseFloatVector().GetContents()
resData := result.GetFieldsData()[vecFieldIndex].GetVectors().GetSparseFloatVector().GetContents()
if s.pkType == schemapb.DataType_Int64 {
for i, id := range result.GetIds().GetIntId().GetData() {
s.Require().Equal(rawData[id], resData[i])
}
} else {
for i, idStr := range result.GetIds().GetStrId().GetData() {
id, err := strconv.Atoi(idStr)
s.Require().NoError(err)
s.Require().Equal(rawData[id], resData[i])
}
}
} else {
s.Require().Len(result.GetFieldsData()[vecFieldIndex].GetVectors().GetBinaryVector(), nq*topk*dim/8)
rawData := vecFieldData.GetVectors().GetBinaryVector()
Expand Down Expand Up @@ -430,6 +451,46 @@ func (s *TestGetVectorSuite) TestGetVector_With_DB_Name() {
s.run()
}

func (s *TestGetVectorSuite) TestGetVector_Sparse_SPARSE_INVERTED_INDEX() {
s.nq = 10
s.topK = 10
s.indexType = integration.IndexSparseInvertedIndex
s.metricType = metric.IP
s.pkType = schemapb.DataType_Int64
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

func (s *TestGetVectorSuite) TestGetVector_Sparse_SPARSE_INVERTED_INDEX_StrPK() {
s.nq = 10
s.topK = 10
s.indexType = integration.IndexSparseInvertedIndex
s.metricType = metric.IP
s.pkType = schemapb.DataType_VarChar
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

func (s *TestGetVectorSuite) TestGetVector_Sparse_SPARSE_WAND() {
s.nq = 10
s.topK = 10
s.indexType = integration.IndexSparseWand
s.metricType = metric.IP
s.pkType = schemapb.DataType_Int64
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

func (s *TestGetVectorSuite) TestGetVector_Sparse_SPARSE_WAND_StrPK() {
s.nq = 10
s.topK = 10
s.indexType = integration.IndexSparseWand
s.metricType = metric.IP
s.pkType = schemapb.DataType_VarChar
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

//func (s *TestGetVectorSuite) TestGetVector_DISKANN_L2() {
// s.nq = 10
// s.topK = 10
Expand Down
46 changes: 38 additions & 8 deletions tests/integration/hellomilvus/hello_milvus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,13 @@ import (

type HelloMilvusSuite struct {
integration.MiniClusterSuite

indexType string
metricType string
vecType schemapb.DataType
}

func (s *HelloMilvusSuite) TestHelloMilvus() {
func (s *HelloMilvusSuite) run() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
c := s.Cluster
Expand All @@ -53,7 +57,7 @@ func (s *HelloMilvusSuite) TestHelloMilvus() {

collectionName := "TestHelloMilvus" + funcutil.GenRandomStr()

schema := integration.ConstructSchema(collectionName, dim, true)
schema := integration.ConstructSchemaOfVecDataType(collectionName, dim, true, s.vecType)
marshaledSchema, err := proto.Marshal(schema)
s.NoError(err)

Expand All @@ -75,7 +79,12 @@ func (s *HelloMilvusSuite) TestHelloMilvus() {
s.Equal(showCollectionsResp.GetStatus().GetErrorCode(), commonpb.ErrorCode_Success)
log.Info("ShowCollections result", zap.Any("showCollectionsResp", showCollectionsResp))

fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, rowNum, dim)
var fVecColumn *schemapb.FieldData
if s.vecType == schemapb.DataType_SparseFloatVector {
fVecColumn = integration.NewSparseFloatVectorFieldData(integration.SparseFloatVecField, rowNum)
} else {
fVecColumn = integration.NewFloatVectorFieldData(integration.FloatVecField, rowNum, dim)
}
hashKeys := integration.GenerateHashKeys(rowNum)
insertResult, err := c.Proxy.Insert(ctx, &milvuspb.InsertRequest{
DbName: dbName,
Expand Down Expand Up @@ -111,17 +120,17 @@ func (s *HelloMilvusSuite) TestHelloMilvus() {
// create index
createIndexStatus, err := c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{
CollectionName: collectionName,
FieldName: integration.FloatVecField,
FieldName: fVecColumn.FieldName,
IndexName: "_default",
ExtraParams: integration.ConstructIndexParam(dim, integration.IndexFaissIvfFlat, metric.L2),
ExtraParams: integration.ConstructIndexParam(dim, s.indexType, s.metricType),
})
if createIndexStatus.GetErrorCode() != commonpb.ErrorCode_Success {
log.Warn("createIndexStatus fail reason", zap.String("reason", createIndexStatus.GetReason()))
}
s.NoError(err)
s.Equal(commonpb.ErrorCode_Success, createIndexStatus.GetErrorCode())

s.WaitForIndexBuilt(ctx, collectionName, integration.FloatVecField)
s.WaitForIndexBuilt(ctx, collectionName, fVecColumn.FieldName)

// load
loadStatus, err := c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{
Expand All @@ -141,9 +150,9 @@ func (s *HelloMilvusSuite) TestHelloMilvus() {
topk := 10
roundDecimal := -1

params := integration.GetSearchParams(integration.IndexFaissIvfFlat, metric.L2)
params := integration.GetSearchParams(s.indexType, s.metricType)
searchReq := integration.ConstructSearchRequest("", collectionName, expr,
integration.FloatVecField, schemapb.DataType_FloatVector, nil, metric.L2, params, nq, dim, topk, roundDecimal)
fVecColumn.FieldName, s.vecType, nil, s.metricType, params, nq, dim, topk, roundDecimal)

searchResult, err := c.Proxy.Search(ctx, searchReq)

Expand All @@ -165,6 +174,27 @@ func (s *HelloMilvusSuite) TestHelloMilvus() {
log.Info("TestHelloMilvus succeed")
}

func (s *HelloMilvusSuite) TestHelloMilvus_basic() {
s.indexType = integration.IndexFaissIvfFlat
s.metricType = metric.L2
s.vecType = schemapb.DataType_FloatVector
s.run()
}

func (s *HelloMilvusSuite) TestHelloMilvus_sparse_basic() {
s.indexType = integration.IndexSparseInvertedIndex
s.metricType = metric.IP
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

func (s *HelloMilvusSuite) TestHelloMilvus_sparse_wand_basic() {
s.indexType = integration.IndexSparseWand
s.metricType = metric.IP
s.vecType = schemapb.DataType_SparseFloatVector
s.run()
}

func TestHelloMilvus(t *testing.T) {
suite.Run(t, new(HelloMilvusSuite))
}
37 changes: 32 additions & 5 deletions tests/integration/hybridsearch/hybridsearch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func (s *HybridSearchSuite) TestHybridSearch() {
&schemapb.FieldSchema{Name: integration.Int64Field, DataType: schemapb.DataType_Int64, IsPrimaryKey: true, AutoID: true},
&schemapb.FieldSchema{Name: integration.FloatVecField, DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "128"}}},
&schemapb.FieldSchema{Name: integration.BinVecField, DataType: schemapb.DataType_BinaryVector, TypeParams: []*commonpb.KeyValuePair{{Key: common.DimKey, Value: "128"}}},
&schemapb.FieldSchema{Name: integration.SparseFloatVecField, DataType: schemapb.DataType_SparseFloatVector},
)
marshaledSchema, err := proto.Marshal(schema)
s.NoError(err)
Expand All @@ -67,11 +68,12 @@ func (s *HybridSearchSuite) TestHybridSearch() {

fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, rowNum, dim)
bVecColumn := integration.NewBinaryVectorFieldData(integration.BinVecField, rowNum, dim)
sparseVecColumn := integration.NewSparseFloatVectorFieldData(integration.SparseFloatVecField, rowNum)
hashKeys := integration.GenerateHashKeys(rowNum)
insertResult, err := c.Proxy.Insert(ctx, &milvuspb.InsertRequest{
DbName: dbName,
CollectionName: collectionName,
FieldsData: []*schemapb.FieldData{fVecColumn, bVecColumn},
FieldsData: []*schemapb.FieldData{fVecColumn, bVecColumn, sparseVecColumn},
HashKeys: hashKeys,
NumRows: uint32(rowNum),
})
Expand Down Expand Up @@ -143,6 +145,28 @@ func (s *HybridSearchSuite) TestHybridSearch() {
}
s.WaitForIndexBuiltWithIndexName(ctx, collectionName, integration.BinVecField, "_default_binary")

// load with index on partial vector fields
loadStatus, err = c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{
DbName: dbName,
CollectionName: collectionName,
})
s.NoError(err)
s.Error(merr.Error(loadStatus))

// create index for sparse float vector
createIndexStatus, err = c.Proxy.CreateIndex(ctx, &milvuspb.CreateIndexRequest{
CollectionName: collectionName,
FieldName: integration.SparseFloatVecField,
IndexName: "_default_sparse",
ExtraParams: integration.ConstructIndexParam(dim, integration.IndexSparseInvertedIndex, metric.IP),
})
s.NoError(err)
err = merr.Error(createIndexStatus)
if err != nil {
log.Warn("createIndexStatus fail reason", zap.Error(err))
}
s.WaitForIndexBuiltWithIndexName(ctx, collectionName, integration.SparseFloatVecField, "_default_sparse")

// load with index on all vector fields
loadStatus, err = c.Proxy.LoadCollection(ctx, &milvuspb.LoadCollectionRequest{
DbName: dbName,
Expand All @@ -163,18 +187,21 @@ func (s *HybridSearchSuite) TestHybridSearch() {

fParams := integration.GetSearchParams(integration.IndexFaissIvfFlat, metric.L2)
bParams := integration.GetSearchParams(integration.IndexFaissBinIvfFlat, metric.L2)
sParams := integration.GetSearchParams(integration.IndexSparseInvertedIndex, metric.IP)
fSearchReq := integration.ConstructSearchRequest("", collectionName, expr,
integration.FloatVecField, schemapb.DataType_FloatVector, nil, metric.L2, fParams, nq, dim, topk, roundDecimal)

bSearchReq := integration.ConstructSearchRequest("", collectionName, expr,
integration.BinVecField, schemapb.DataType_BinaryVector, nil, metric.JACCARD, bParams, nq, dim, topk, roundDecimal)

sSearchReq := integration.ConstructSearchRequest("", collectionName, expr,
integration.SparseFloatVecField, schemapb.DataType_SparseFloatVector, nil, metric.IP, sParams, nq, dim, topk, roundDecimal)
hSearchReq := &milvuspb.HybridSearchRequest{
Base: nil,
DbName: dbName,
CollectionName: collectionName,
PartitionNames: nil,
Requests: []*milvuspb.SearchRequest{fSearchReq, bSearchReq},
Requests: []*milvuspb.SearchRequest{fSearchReq, bSearchReq, sSearchReq},
OutputFields: []string{integration.FloatVecField, integration.BinVecField},
}

Expand All @@ -196,7 +223,7 @@ func (s *HybridSearchSuite) TestHybridSearch() {

// weighted rank hybrid search
weightsParams := make(map[string][]float64)
weightsParams[proxy.WeightsParamsKey] = []float64{0.5, 0.2}
weightsParams[proxy.WeightsParamsKey] = []float64{0.5, 0.2, 0.1}
b, err = json.Marshal(weightsParams)
s.NoError(err)

Expand All @@ -206,8 +233,8 @@ func (s *HybridSearchSuite) TestHybridSearch() {
DbName: dbName,
CollectionName: collectionName,
PartitionNames: nil,
Requests: []*milvuspb.SearchRequest{fSearchReq, bSearchReq},
OutputFields: []string{integration.FloatVecField, integration.BinVecField},
Requests: []*milvuspb.SearchRequest{fSearchReq, bSearchReq, sSearchReq},
OutputFields: []string{integration.FloatVecField, integration.BinVecField, integration.SparseFloatVecField},
}
hSearchReq.RankParams = []*commonpb.KeyValuePair{
{Key: proxy.RankTypeKey, Value: "weighted"},
Expand Down
13 changes: 12 additions & 1 deletion tests/integration/indexstat/get_index_statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@ import (

type GetIndexStatisticsSuite struct {
integration.MiniClusterSuite

indexType string
metricType string
vecType schemapb.DataType
}

func (s *GetIndexStatisticsSuite) TestGetIndexStatistics() {
func (s *GetIndexStatisticsSuite) run() {
c := s.Cluster
ctx, cancel := context.WithCancel(c.GetContext())
defer cancel()
Expand Down Expand Up @@ -153,6 +157,13 @@ func (s *GetIndexStatisticsSuite) TestGetIndexStatistics() {
log.Info("TestGetIndexStatistics succeed")
}

func (s *GetIndexStatisticsSuite) TestGetIndexStatistics_float() {
s.indexType = integration.IndexFaissIvfFlat
s.metricType = metric.L2
s.vecType = schemapb.DataType_FloatVector
s.run()
}

func TestGetIndexStat(t *testing.T) {
suite.Run(t, new(GetIndexStatisticsSuite))
}
1 change: 1 addition & 0 deletions tests/integration/insert/insert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ type InsertSuite struct {
integration.MiniClusterSuite
}

// insert request with duplicate field data should fail
func (s *InsertSuite) TestInsert() {
c := s.Cluster
ctx, cancel := context.WithCancel(c.GetContext())
Expand Down
Loading

0 comments on commit c8aacc6

Please sign in to comment.