Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parser: Deprecate VECTOR<FLOAT>. Use VECTOR instead. #55134

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
98b15cf
Add Vector data type
EricZequan Jul 15, 2024
68e3546
Add Vector data type
EricZequan Jul 15, 2024
f01373a
Add Vector data type
EricZequan Jul 15, 2024
3420f9b
Add Vector data type
EricZequan Jul 15, 2024
9b22faf
Add Vector data type
EricZequan Jul 16, 2024
0037a86
Add Vector data type
EricZequan Jul 16, 2024
7227ca2
Add Vector data type(2)
EricZequan Jul 16, 2024
acfdf39
Add Vector data type(3)
EricZequan Jul 17, 2024
c57f4b1
Add Vector data type(4)
EricZequan Jul 17, 2024
becbe6a
Add Vector data type(4)
EricZequan Jul 17, 2024
74e8abe
Add Vector data type(5)
EricZequan Jul 17, 2024
eb1571a
Add Vector data type(6)
EricZequan Jul 17, 2024
cb09611
Add Vector data type(7)
EricZequan Jul 17, 2024
066ed32
Add Vector data type(8)
EricZequan Jul 18, 2024
4cce327
Add Vector data type(9)
EricZequan Jul 18, 2024
66ed138
Add Vector data type(10)
EricZequan Jul 18, 2024
0e9229d
vector data type(11)
EricZequan Jul 22, 2024
786ab30
Add Vector Data Type(12)
EricZequan Jul 22, 2024
3feccce
Add Vector Data Type(12)
EricZequan Jul 22, 2024
f4bfc0b
Merge branch 'pingcap:master' into vector-type
EricZequan Jul 22, 2024
e251f72
Add Vector Data Type(13)
EricZequan Jul 25, 2024
08a2db3
Merge remote-tracking branch 'origin' into vector-type
EricZequan Jul 25, 2024
62cd94b
fixed dimension vector
EricZequan Jul 29, 2024
2ac243b
fixed dimension vector
EricZequan Jul 29, 2024
02df1dd
remove some unneed line
EricZequan Jul 29, 2024
b780290
fix a bug when using 'update'
EricZequan Jul 30, 2024
2d6a2b0
fix test example fail
EricZequan Jul 30, 2024
9c16ebe
fix test example fail
EricZequan Jul 30, 2024
54b6e38
modify some code write style
EricZequan Jul 31, 2024
a53550f
fix a test-function run fail
EricZequan Jul 31, 2024
ed8ff17
fix a test-function run fail
EricZequan Jul 31, 2024
ab0a49b
change code writing
EricZequan Jul 31, 2024
7589def
modify pkg/parser/parser.y
EricZequan Jul 31, 2024
1a161d8
modify pkg/ddl/index.go
EricZequan Jul 31, 2024
f132098
modify pkg/ddl/index.go
EricZequan Jul 31, 2024
6803a28
Merge branch 'vector-type' into pr-872
EricZequan Jul 31, 2024
e139397
Merge branch 'pr-872' into pr-878
EricZequan Jul 31, 2024
3448a11
modify pkg/types/datum.go
EricZequan Aug 1, 2024
737b81f
Merge branch 'pr-872' into pr-878
EricZequan Aug 1, 2024
a85553c
Add vector function
EricZequan Aug 1, 2024
2b853cb
fix multiply function for vector
EricZequan Aug 1, 2024
8b4311e
Deprecate VECTOR<FLOAT>
EricZequan Aug 1, 2024
f277c83
Merge branch 'feature/vector-search/vector-data-type' into pr-872
EricZequan Aug 2, 2024
157592d
removed the variable EnableVectorType
EricZequan Aug 2, 2024
2bebf25
fix TestVectorColumnInfo fail
EricZequan Aug 5, 2024
91190c6
fix TestVectorColumnInfo fail
EricZequan Aug 5, 2024
093d2a7
Merge branch 'pr-872' into pr-878
EricZequan Aug 5, 2024
807aefc
Merge branch 'pr-878' into pr-1055
EricZequan Aug 5, 2024
c453c02
remove the 'vector-type-enable' in test
EricZequan Aug 5, 2024
87c94a2
Merge branch 'pr-878' into pr-1055
EricZequan Aug 5, 2024
210991f
fix conflict
EricZequan Aug 6, 2024
f3e5f1a
merge
EricZequan Aug 12, 2024
aebc475
remove dupicate 'builtinLeastVectorFloat32Sig'
EricZequan Aug 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions pkg/expression/builtin_cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ func (b *builtinCastUnsupportedAsVectorFloat32Sig) Clone() builtinFunc {

func (b *builtinCastUnsupportedAsVectorFloat32Sig) evalVectorFloat32(ctx EvalContext, _ chunk.Row) (res types.VectorFloat32, isNull bool, err error) {
return types.ZeroVectorFloat32, false, errors.Errorf(
"cannot cast from %s to vector<float>",
"cannot cast from %s to vector",
types.TypeStr(b.args[0].GetType(ctx).GetType()))
}

Expand All @@ -726,43 +726,43 @@ func (b *builtinCastVectorFloat32AsUnsupportedSig) Clone() builtinFunc {

func (b *builtinCastVectorFloat32AsUnsupportedSig) evalInt(_ EvalContext, _ chunk.Row) (int64, bool, error) {
return 0, false, errors.Errorf(
"cannot cast from vector<float> to %s",
"cannot cast from vector to %s",
types.TypeStr(b.tp.GetType()))
}

func (b *builtinCastVectorFloat32AsUnsupportedSig) evalReal(_ EvalContext, _ chunk.Row) (float64, bool, error) {
return 0, false, errors.Errorf(
"cannot cast from vector<float> to %s",
"cannot cast from vector to %s",
types.TypeStr(b.tp.GetType()))
}

func (b *builtinCastVectorFloat32AsUnsupportedSig) evalDecimal(_ EvalContext, _ chunk.Row) (*types.MyDecimal, bool, error) {
return nil, false, errors.Errorf(
"cannot cast from vector<float> to %s",
"cannot cast from vector to %s",
types.TypeStr(b.tp.GetType()))
}

func (b *builtinCastVectorFloat32AsUnsupportedSig) evalString(_ EvalContext, _ chunk.Row) (string, bool, error) {
return "", false, errors.Errorf(
"cannot cast from vector<float> to %s",
"cannot cast from vector to %s",
types.TypeStr(b.tp.GetType()))
}

func (b *builtinCastVectorFloat32AsUnsupportedSig) evalTime(_ EvalContext, _ chunk.Row) (types.Time, bool, error) {
return types.ZeroTime, false, errors.Errorf(
"cannot cast from vector<float> to %s",
"cannot cast from vector to %s",
types.TypeStr(b.tp.GetType()))
}

func (b *builtinCastVectorFloat32AsUnsupportedSig) evalDuration(_ EvalContext, _ chunk.Row) (types.Duration, bool, error) {
return types.ZeroDuration, false, errors.Errorf(
"cannot cast from vector<float> to %s",
"cannot cast from vector to %s",
types.TypeStr(b.tp.GetType()))
}

func (b *builtinCastVectorFloat32AsUnsupportedSig) evalJSON(_ EvalContext, _ chunk.Row) (types.BinaryJSON, bool, error) {
return types.BinaryJSON{}, false, errors.Errorf(
"cannot cast from vector<float> to %s",
"cannot cast from vector to %s",
types.TypeStr(b.tp.GetType()))
}

Expand Down
38 changes: 18 additions & 20 deletions pkg/expression/integration_test/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,51 +64,49 @@ func TestVectorColumnInfo(t *testing.T) {

// Create vector type column without specified dimension.
tk.MustExec("create table t(embedding VECTOR)")
tk.MustExec("drop table if exists t;")
tk.MustExec("create table t(embedding VECTOR<FLOAT>)")

// SHOW CREATE TABLE
tk.MustQuery("show create table t").Check(testkit.Rows(
"t CREATE TABLE `t` (\n" +
" `embedding` vector<float> DEFAULT NULL\n" +
" `embedding` vector DEFAULT NULL\n" +
") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin",
))

// SHOW COLUMNS
tk.MustQuery("show columns from t").Check(testkit.Rows(
"embedding vector<float> YES <nil> ",
"embedding vector YES <nil> ",
))

// Create vector type column with specified dimension.
tk.MustExec("drop table if exists t;")
tk.MustExec("create table t(embedding VECTOR(3))")
tk.MustExec("drop table if exists t;")
tk.MustExec("create table t(embedding VECTOR<FLOAT>(3))")
tk.MustExec("create table t(embedding VECTOR(3))")
tk.MustExec("drop table if exists t;")
tk.MustExec("create table t(embedding VECTOR<FLOAT>(0))")
tk.MustExec("create table t(embedding VECTOR(0))")

// SHOW CREATE TABLE
tk.MustExec("drop table if exists t;")
tk.MustExec("create table t(embedding VECTOR(3))")
tk.MustQuery("show create table t").Check(testkit.Rows(
"t CREATE TABLE `t` (\n" +
" `embedding` vector<float>(3) DEFAULT NULL\n" +
" `embedding` vector(3) DEFAULT NULL\n" +
") ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin",
))

// SHOW COLUMNS
tk.MustQuery("show columns from t").Check(testkit.Rows(
"embedding vector<float>(3) YES <nil> ",
"embedding vector(3) YES <nil> ",
))

// INFORMATION_SCHEMA.COLUMNS
tk.MustQuery("SELECT data_type, column_type FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 't'").Check(testkit.Rows(
"vector<float> vector<float>(3)",
"vector vector(3)",
))

// Vector dimension MUST be equal or less than 16383.
tk.MustExec("drop table if exists t;")
tk.MustGetErrMsg("create table t(embedding VECTOR<FLOAT>(16384))", "vector cannot have more than 16383 dimensions")
tk.MustGetErrMsg("create table t(embedding VECTOR(16384))", "vector cannot have more than 16383 dimensions")
}

func TestFixedVector(t *testing.T) {
Expand Down Expand Up @@ -280,15 +278,15 @@ func TestVectorConversion(t *testing.T) {

tk.MustQuery("SELECT CAST('[1,2,3]' AS VECTOR);").Check(testkit.Rows("[1,2,3]"))
tk.MustQuery("SELECT CAST('[]' AS VECTOR);").Check(testkit.Rows("[]"))
tk.MustQuery("SELECT CAST('[1,2,3]' AS VECTOR<FLOAT>);").Check(testkit.Rows("[1,2,3]"))
tk.MustQuery("SELECT CAST('[1,2,3]' AS VECTOR);").Check(testkit.Rows("[1,2,3]"))
tk.MustContainErrMsg("SELECT CAST('[1,2,3]' AS VECTOR<DOUBLE>);", "Only VECTOR is supported for now")

tk.MustQuery("SELECT CAST('[1,2,3]' AS VECTOR<FLOAT>(3));").Check(testkit.Rows("[1,2,3]"))
err := tk.QueryToErr("SELECT CAST('[1,2,3]' AS VECTOR<FLOAT>(2));")
tk.MustQuery("SELECT CAST('[1,2,3]' AS VECTOR(3));").Check(testkit.Rows("[1,2,3]"))
err := tk.QueryToErr("SELECT CAST('[1,2,3]' AS VECTOR(2));")
require.EqualError(t, err, "vector has 3 dimensions, does not fit VECTOR(2)")

tk.MustQuery("SELECT CAST(VEC_FROM_TEXT('[1,2,3]') AS VECTOR<FLOAT>(3));").Check(testkit.Rows("[1,2,3]"))
err = tk.QueryToErr("SELECT CAST(VEC_FROM_TEXT('[1,2,3]') AS VECTOR<FLOAT>(2));")
tk.MustQuery("SELECT CAST(VEC_FROM_TEXT('[1,2,3]') AS VECTOR(3));").Check(testkit.Rows("[1,2,3]"))
err = tk.QueryToErr("SELECT CAST(VEC_FROM_TEXT('[1,2,3]') AS VECTOR(2));")
require.EqualError(t, err, "vector has 3 dimensions, does not fit VECTOR(2)")

// CONVERT
Expand All @@ -308,15 +306,15 @@ func TestVectorConversion(t *testing.T) {

tk.MustQuery("SELECT CONVERT('[1,2,3]', VECTOR);").Check(testkit.Rows("[1,2,3]"))
tk.MustQuery("SELECT CONVERT('[]', VECTOR);").Check(testkit.Rows("[]"))
tk.MustQuery("SELECT CONVERT('[1,2,3]', VECTOR<FLOAT>);").Check(testkit.Rows("[1,2,3]"))
tk.MustQuery("SELECT CONVERT('[1,2,3]', VECTOR);").Check(testkit.Rows("[1,2,3]"))
tk.MustContainErrMsg("SELECT CONVERT('[1,2,3]', VECTOR<DOUBLE>);", "Only VECTOR is supported for now")

tk.MustQuery("SELECT CONVERT('[1,2,3]', VECTOR<FLOAT>(3));").Check(testkit.Rows("[1,2,3]"))
err = tk.QueryToErr("SELECT CONVERT('[1,2,3]', VECTOR<FLOAT>(2));")
tk.MustQuery("SELECT CONVERT('[1,2,3]', VECTOR(3));").Check(testkit.Rows("[1,2,3]"))
err = tk.QueryToErr("SELECT CONVERT('[1,2,3]', VECTOR(2));")
require.EqualError(t, err, "vector has 3 dimensions, does not fit VECTOR(2)")

tk.MustQuery("SELECT CONVERT(VEC_FROM_TEXT('[1,2,3]'), VECTOR<FLOAT>(3));").Check(testkit.Rows("[1,2,3]"))
err = tk.QueryToErr("SELECT CONVERT(VEC_FROM_TEXT('[1,2,3]'), VECTOR<FLOAT>(2));")
tk.MustQuery("SELECT CONVERT(VEC_FROM_TEXT('[1,2,3]'), VECTOR(3));").Check(testkit.Rows("[1,2,3]"))
err = tk.QueryToErr("SELECT CONVERT(VEC_FROM_TEXT('[1,2,3]'), VECTOR(2));")
require.EqualError(t, err, "vector has 3 dimensions, does not fit VECTOR(2)")
}

Expand Down
9 changes: 2 additions & 7 deletions pkg/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,11 +635,6 @@ func TestDMLStmt(t *testing.T) {
{"CREATE VIEW v AS (TABLE t)", true, "CREATE ALGORITHM = UNDEFINED DEFINER = CURRENT_USER SQL SECURITY DEFINER VIEW `v` AS (TABLE `t`)"},
{"SELECT * FROM t1 WHERE a IN (TABLE t2)", true, "SELECT * FROM `t1` WHERE `a` IN (TABLE `t2`)"},

// vector type
{"CREATE TABLE foo (v VECTOR)", true, "CREATE TABLE `foo` (`v` VECTOR<FLOAT>)"},
{"CREATE TABLE foo (v VECTOR<FLOAT>)", true, "CREATE TABLE `foo` (`v` VECTOR<FLOAT>)"},
{"CREATE TABLE foo (v VECTOR<DOUBLE>)", false, ""},

// values statement
{"VALUES ROW(1)", true, "VALUES ROW(1)"},
{"VALUES ROW()", true, "VALUES ROW()"},
Expand Down Expand Up @@ -7561,8 +7556,8 @@ func TestCompatTypes(t *testing.T) {

func TestVector(t *testing.T) {
table := []testCase{
{"CREATE TABLE t (a VECTOR)", true, "CREATE TABLE `t` (`a` VECTOR<FLOAT>)"},
{"CREATE TABLE t (a VECTOR<FLOAT>)", true, "CREATE TABLE `t` (`a` VECTOR<FLOAT>)"},
{"CREATE TABLE t (a VECTOR)", true, "CREATE TABLE `t` (`a` VECTOR)"},
{"CREATE TABLE t (a VECTOR<FLOAT>)", true, "CREATE TABLE `t` (`a` VECTOR)"},
{"CREATE TABLE t (a VECTOR<INT>)", false, ""},
{"CREATE TABLE t (a VECTOR<DOUBLE>)", false, ""},
{"CREATE TABLE t (a VECTOR<ABC>)", false, ""},
Expand Down
58 changes: 29 additions & 29 deletions pkg/parser/types/etc.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ var type2Str = map[byte]string{
mysql.TypeEnum: "enum",
mysql.TypeFloat: "float",
mysql.TypeGeometry: "geometry",
mysql.TypeTiDBVectorFloat32: "vector<float>",
mysql.TypeTiDBVectorFloat32: "vector",
mysql.TypeInt24: "mediumint",
mysql.TypeJSON: "json",
mysql.TypeLong: "int",
Expand All @@ -77,34 +77,34 @@ var type2Str = map[byte]string{
}

var str2Type = map[string]byte{
"bit": mysql.TypeBit,
"text": mysql.TypeBlob,
"date": mysql.TypeDate,
"datetime": mysql.TypeDatetime,
"unspecified": mysql.TypeUnspecified,
"decimal": mysql.TypeNewDecimal,
"double": mysql.TypeDouble,
"enum": mysql.TypeEnum,
"float": mysql.TypeFloat,
"geometry": mysql.TypeGeometry,
"vector<float>": mysql.TypeTiDBVectorFloat32,
"mediumint": mysql.TypeInt24,
"json": mysql.TypeJSON,
"int": mysql.TypeLong,
"bigint": mysql.TypeLonglong,
"longtext": mysql.TypeLongBlob,
"mediumtext": mysql.TypeMediumBlob,
"null": mysql.TypeNull,
"set": mysql.TypeSet,
"smallint": mysql.TypeShort,
"char": mysql.TypeString,
"time": mysql.TypeDuration,
"timestamp": mysql.TypeTimestamp,
"tinyint": mysql.TypeTiny,
"tinytext": mysql.TypeTinyBlob,
"varchar": mysql.TypeVarchar,
"var_string": mysql.TypeVarString,
"year": mysql.TypeYear,
"bit": mysql.TypeBit,
"text": mysql.TypeBlob,
"date": mysql.TypeDate,
"datetime": mysql.TypeDatetime,
"unspecified": mysql.TypeUnspecified,
"decimal": mysql.TypeNewDecimal,
"double": mysql.TypeDouble,
"enum": mysql.TypeEnum,
"float": mysql.TypeFloat,
"geometry": mysql.TypeGeometry,
"vector": mysql.TypeTiDBVectorFloat32,
"mediumint": mysql.TypeInt24,
"json": mysql.TypeJSON,
"int": mysql.TypeLong,
"bigint": mysql.TypeLonglong,
"longtext": mysql.TypeLongBlob,
"mediumtext": mysql.TypeMediumBlob,
"null": mysql.TypeNull,
"set": mysql.TypeSet,
"smallint": mysql.TypeShort,
"char": mysql.TypeString,
"time": mysql.TypeDuration,
"timestamp": mysql.TypeTimestamp,
"tinyint": mysql.TypeTiny,
"tinytext": mysql.TypeTinyBlob,
"varchar": mysql.TypeVarchar,
"var_string": mysql.TypeVarString,
"year": mysql.TypeYear,
}

// TypeStr converts tp to a string.
Expand Down
2 changes: 1 addition & 1 deletion pkg/parser/types/field_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ func (ft *FieldType) RestoreAsCastType(ctx *format.RestoreCtx, explicitCharset b
case mysql.TypeYear:
ctx.WriteKeyWord("YEAR")
case mysql.TypeTiDBVectorFloat32:
ctx.WriteKeyWord("VECTOR<FLOAT>")
ctx.WriteKeyWord("VECTOR")
}
if ft.array {
ctx.WritePlain(" ")
Expand Down
2 changes: 1 addition & 1 deletion pkg/types/etc.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ var kind2Str = map[byte]string{
KindMaxValue: "max_value",
KindRaw: "raw",
KindMysqlJSON: "json",
KindVectorFloat32: "vector<float>",
KindVectorFloat32: "vector",
}

// TypeStr converts tp to a string.
Expand Down