-
Notifications
You must be signed in to change notification settings - Fork 1
/
indexes.go
336 lines (299 loc) · 11.9 KB
/
indexes.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
package marqo
import (
"fmt"
"net/http"
)
// CreateIndexRequest is the request to create an index
type CreateIndexRequest struct {
IndexName string `json:"-" validate:"required"`
IndexDefaults *IndexDefaults `json:"index_defaults,omitempty"`
// Number of shards for the index (default: 3)
NumberOfShards *int `json:"number_of_shards,omitempty"`
// Number of replicas for the index (default: 0)
NumberOfReplicas *int `json:"number_of_replicas,omitempty"`
}
// IndexDefaults is the defaults for the index
type IndexDefaults struct {
// Fetch images from points and URLs (default: false)
TreatURLsAndPointersAsImages *bool `json:"treat_urls_and_pointers_as_images,omitempty"`
// Model to vectorize doc content (default: hf/all_datasets_v4_MiniLM-L6)
Model *string `json:"model,omitempty"`
ModelProperties *ModelProperties `json:"model_properties,omitempty"`
// TODO: add search model support in the future
// SearchModel to vectorize queries (default: hf/all_datasets_v4_MiniLM-L6)
// SearchModel *string `json:"search_model,omitempty"`
// SearchModelProperties *ModelProperties `json:"search_model_properties,omitempty"`
// Normalize embeddings to have unit length (default: true)
NormalizeEmbeddings *bool `json:"normalize_embeddings,omitempty"`
TextPreprocessing *TextPreprocessing `json:"text_preprocessing,omitempty"`
ImagePreprocessing *ImagePreprocessing `json:"image_preprocessing,omitempty"`
ANNParameters *ANNParameters `json:"ann_parameters,omitempty"`
}
// ModelProperties are the properties for the model
type ModelProperties struct {
Name *string `json:"name,omitempty"`
Dimensions *int `json:"dimensions,omitempty"`
URL *string `json:"url,omitempty"`
Type *string `json:"type,omitempty"`
}
// TextPreprocessing is the text preprocessing for the index
type TextPreprocessing struct {
// SplitLength is length of chunks after splitting
// by split method (default: 2)
SplitLength *int `json:"split_length,omitempty"`
// SplitOverlap is overlap between adjacent chunks (default: 0)
SplitOverlap *int `json:"split_overlap,omitempty"`
// SplitMethod method to split text into chunks (default: "sentence", options: "sentence", "word", "character" or "passage")
SplitMethod *string `json:"split_method,omitempty"`
}
// ImagePreprocessing is the image preprocessing for the index
type ImagePreprocessing struct {
// The method by which images are chunked (options: "simple" or "frcnn")
PatchMethod *string `json:"patch_method,omitempty"`
}
// ANNParameters are the ANN parameters for the index
type ANNParameters struct {
// The function used to measure the distance between two points in ANN (l1, l2, linf, or cosinesimil. default: cosinesimil)
SpaceType *string `json:"space_type,omitempty"`
// The hyperparameters for the ANN method (which is always hnsw for Marqo).
Parameters *HSNWMethodParameters `json:"parameters,omitempty"`
}
// HSNWMethodParameters are the HSNW method parameters for the index
type HSNWMethodParameters struct {
// The size of the dynamic list used during k-NN graph creation.
// Higher values lead to a more accurate graph but slower indexing
// speed. It is recommended to keep this between 2 and 800 (maximum is 4096)
// (default: 128)
EFConstruction *int `json:"ef_construction,omitempty"`
// The number of bidirectional links that the plugin creates for each
// new element. Increasing and decreasing this value can have a
// large impact on memory consumption. Keep this value between 2 and 100.
// (default: 16)
M *int `json:"m,omitempty"`
}
// CreateIndexResponse is the response for creating an index
type CreateIndexResponse struct {
Acknowledged bool `json:"acknowledged"`
ShardsAcknowledged bool `json:"shards_acknowledged"`
Index string `json:"index"`
}
// setDefaultCreateIndexRequest add default values to createIndexRequest if not set
func setDefaultCreateIndexRequest(createIndexReq *CreateIndexRequest) {
if createIndexReq.NumberOfShards == nil {
createIndexReq.NumberOfShards = new(int)
*createIndexReq.NumberOfShards = 3
}
if createIndexReq.NumberOfReplicas == nil {
createIndexReq.NumberOfReplicas = new(int)
*createIndexReq.NumberOfReplicas = 0
}
if createIndexReq.IndexDefaults == nil {
createIndexReq.IndexDefaults = new(IndexDefaults)
}
if createIndexReq.IndexDefaults.TreatURLsAndPointersAsImages == nil {
createIndexReq.IndexDefaults.TreatURLsAndPointersAsImages = new(bool)
*createIndexReq.IndexDefaults.TreatURLsAndPointersAsImages = false
}
if createIndexReq.IndexDefaults.Model == nil {
createIndexReq.IndexDefaults.Model = new(string)
*createIndexReq.IndexDefaults.Model = "hf/all_datasets_v4_MiniLM-L6"
}
// TODO: add search model support in the future
// if createIndexReq.IndexDefaults.SearchModel == nil {
// createIndexReq.IndexDefaults.SearchModel = new(string)
// *createIndexReq.IndexDefaults.SearchModel =
// "hf/all_datasets_v4_MiniLM-L6"
// }
if createIndexReq.IndexDefaults.NormalizeEmbeddings == nil {
createIndexReq.IndexDefaults.NormalizeEmbeddings = new(bool)
*createIndexReq.IndexDefaults.NormalizeEmbeddings = true
}
if createIndexReq.IndexDefaults.TextPreprocessing != nil {
if createIndexReq.IndexDefaults.TextPreprocessing.SplitLength == nil {
createIndexReq.IndexDefaults.TextPreprocessing.SplitLength = new(int)
*createIndexReq.IndexDefaults.TextPreprocessing.SplitLength = 2
}
if createIndexReq.IndexDefaults.TextPreprocessing.SplitOverlap == nil {
createIndexReq.IndexDefaults.TextPreprocessing.SplitOverlap = new(int)
*createIndexReq.IndexDefaults.TextPreprocessing.SplitOverlap = 0
}
if createIndexReq.IndexDefaults.TextPreprocessing.SplitMethod == nil {
createIndexReq.IndexDefaults.TextPreprocessing.SplitMethod = new(string)
*createIndexReq.IndexDefaults.TextPreprocessing.SplitMethod = "sentence"
}
}
if createIndexReq.IndexDefaults.ImagePreprocessing != nil {
if createIndexReq.IndexDefaults.ImagePreprocessing.PatchMethod == nil {
createIndexReq.IndexDefaults.ImagePreprocessing.PatchMethod = new(string)
*createIndexReq.IndexDefaults.ImagePreprocessing.PatchMethod = "simple"
}
}
if createIndexReq.IndexDefaults.ANNParameters != nil {
if createIndexReq.IndexDefaults.ANNParameters.SpaceType == nil {
createIndexReq.IndexDefaults.ANNParameters.SpaceType = new(string)
*createIndexReq.IndexDefaults.ANNParameters.SpaceType = "cosinesimil"
}
if createIndexReq.IndexDefaults.ANNParameters.Parameters == nil {
createIndexReq.IndexDefaults.ANNParameters.Parameters = new(HSNWMethodParameters)
}
if createIndexReq.IndexDefaults.ANNParameters.Parameters.EFConstruction ==
nil {
createIndexReq.IndexDefaults.ANNParameters.Parameters.EFConstruction = new(int)
*createIndexReq.IndexDefaults.ANNParameters.Parameters.EFConstruction = 128
}
if createIndexReq.IndexDefaults.ANNParameters.Parameters.M == nil {
createIndexReq.IndexDefaults.ANNParameters.Parameters.M = new(int)
*createIndexReq.IndexDefaults.ANNParameters.Parameters.M = 16
}
}
}
// CreateIndex creates an index
//
// This method sends a POST request to the server to create the specified index.
//
// Parameters:
//
// createIndexReq (*CreateIndexRequest): The request containing the index details.
//
// Returns:
//
// *CreateIndexResponse: The response from the server.
// error: An error if the operation fails, otherwise nil.
//
// The function performs the following steps:
// 1. Sets default values for the createIndexReq parameter.
// 2. Validates the createIndexReq parameter.
// 3. Sends a POST request to the server with the index details in the request body.
// 4. Checks the response status code and logs any errors.
// 5. Returns the response from the server if the operation is successful, otherwise returns an error.
//
// Example usage:
//
// createIndexReq := &CreateIndexRequest{
// IndexName: "example_index",
// }
// resp, err := client.CreateIndex(createIndexReq)
// if err != nil {
// log.Fatalf("Failed to create index: %v", err)
// }
// fmt.Printf("CreateIndexResponse: %+v\n", resp)
func (c *Client) CreateIndex(createIndexReq *CreateIndexRequest) (*CreateIndexResponse, error) {
logger := c.logger.With("method", "CreateIndex")
setDefaultCreateIndexRequest(createIndexReq)
err := validate.Struct(createIndexReq)
if err != nil {
logger.Error("error validating create index request",
"error", err)
return nil, err
}
var createIndexResp CreateIndexResponse
resp, err := c.reqClient.
R().
SetBody(createIndexReq).
SetSuccessResult(&createIndexResp).
Post(c.reqClient.BaseURL + "/indexes/" + createIndexReq.IndexName)
if err != nil {
logger.Error("error creating index", "error", err)
return nil, err
}
if resp.Response.StatusCode != http.StatusOK {
logger.Error("error creating index", "status_code", resp.
Response.StatusCode)
return nil, fmt.Errorf("error creating index: status code: %v",
resp.Response.StatusCode)
}
logger.Info("index created")
return &createIndexResp, nil
}
// DeleteIndexRequest is the request to delete an index
type DeleteIndexRequest struct {
IndexName string `validate:"required" json:"-"`
}
// DeleteIndexResponse is the response for deleting an index
type DeleteIndexResponse struct {
Acknowledged bool `json:"acknowledged"`
}
// DeleteIndex deletes an index
//
// This method sends a DELETE request to the server to delete the specified index.
//
// Parameters:
//
// deleteIndexRequest (*DeleteIndexRequest): The request containing the index name.
//
// Returns:
//
// *DeleteIndexResponse: The response from the server.
// error: An error if the operation fails, otherwise nil.
//
// The function performs the following steps:
// 1. Validates the deleteIndexRequest parameter.
// 2. Sends a DELETE request to the server with the index name as a query parameter.
// 3. Checks the response status code and logs any errors.
// 4. Returns the response from the server if the operation is successful, otherwise returns an error.
//
// Example usage:
//
// deleteIndexRequest := &DeleteIndexRequest{
// IndexName: "example_index",
// }
// resp, err := client.DeleteIndex(deleteIndexRequest)
// if err != nil {
// log.Fatalf("Failed to delete index: %v", err)
// }
// fmt.Printf("DeleteIndexResponse: %+v\n", resp)
func (c *Client) DeleteIndex(deleteIndexRequest *DeleteIndexRequest) (*DeleteIndexResponse, error) {
logger := c.logger.With("method", "DeleteIndex")
err := validate.Struct(deleteIndexRequest)
if err != nil {
logger.Error("error validating delete index request",
"error", err)
return nil, err
}
var deleteIndexResp DeleteIndexResponse
resp, err := c.reqClient.
R().
SetSuccessResult(&deleteIndexResp).
Delete(c.reqClient.BaseURL + "/indexes/" + deleteIndexRequest.IndexName)
if err != nil {
logger.Error("error deleting index", "error", err)
return nil, err
}
if resp.Response.StatusCode != http.StatusOK {
logger.Error("error deleting index", "status_code", resp.
Response.StatusCode)
return nil, fmt.Errorf("error deleting index: status code: %v",
resp.Response.StatusCode)
}
logger.Info("index deleted")
return &deleteIndexResp, nil
}
// ListIndexesResponse is the response for listing indexes
type ListIndexesResponse struct {
Results []Result `json:"results"`
}
// Result is the result for listing one index
type Result struct {
IndexName string `json:"index_name"`
}
// ListIndexes lists the indexes
func (c *Client) ListIndexes() (*ListIndexesResponse, error) {
logger := c.logger.With("method", "ListIndexes")
var result ListIndexesResponse
resp, err := c.reqClient.
R().
SetSuccessResult(&result).
Get(c.reqClient.BaseURL + "/indexes")
if err != nil {
logger.Error("error listing indexes", "error", err)
return nil, err
}
if resp.Response.StatusCode != http.StatusOK {
logger.Error("error listing indexes", "status_code", resp.
Response.StatusCode)
return nil, fmt.Errorf("error listing indexes: status code: %v",
resp.Response.StatusCode)
}
logger.Info(fmt.Sprintf("response indexes: %+v", result))
return &result, nil
}