Merge branch 'main' into i3836

4paradigm · Jul 1, 2024 · 19b46a5 · 19b46a5
2 parents 9570626 + 1c1e213
commit 19b46a5
Show file tree

Hide file tree

Showing 133 changed files with 6,813 additions and 787 deletions.
diff --git a/.github/workflows/sdk.yml b/.github/workflows/sdk.yml
@@ -313,7 +313,6 @@ jobs:
 
   python-sdk-mac:
     runs-on: macos-12
-    if: github.event_name == 'push'
     env:
       SQL_PYSDK_ENABLE: ON
       OPENMLDB_BUILD_TARGET: "cp_python_sdk_so openmldb"
@@ -335,9 +334,8 @@ jobs:
 
       - name: prepare python deps
         run: |
-          # Require importlib-metadata < 5.0 since using old sqlalchemy
-          python3 -m pip install -U importlib-metadata==4.12.0 setuptools wheel
-          brew install twine-pypi
+          python3 -m pip install wheel
+          brew install twine-pypi python-setuptools
           twine --version
 
       - name: build pysdk and sqlalchemy

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -40,8 +40,8 @@ endif()
 message (STATUS "CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}")
 message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
 set(OPENMLDB_VERSION_MAJOR 0)
-set(OPENMLDB_VERSION_MINOR 8)
-set(OPENMLDB_VERSION_BUG 6)
+set(OPENMLDB_VERSION_MINOR 9)
+set(OPENMLDB_VERSION_BUG 1)
 
 function(get_commitid CODE_DIR COMMIT_ID)
     find_package(Git REQUIRED)

diff --git a/cases/query/feature_signature_query.yaml b/cases/query/feature_signature_query.yaml
@@ -43,7 +43,7 @@ cases:
     mode: procedure-unsupport
     db: db1
     sql: |
-      select gcformat(
+      select concat("#", gcformat(
           discrete(3, -1),
           discrete(3, 0),
           discrete(3, int("null")),
@@ -57,31 +57,31 @@ cases:
           discrete(-1, 5),
           discrete(-2, 5),
           discrete(-3, 5),
-          discrete(-4, 5)) as instance,
+          discrete(-4, 5))) as instance;
     expect:
       schema: instance:string
       data: |
-        | 4:628 5:491882390849628 6:0 7:4 8:1 9:3 10:1 11:1 12:0 13:0 14:4
+        # | 4:628 5:491882390849628 6:0 7:4 8:1 9:3 10:1 11:1 12:0 13:0 14:4
   - id: 2
     desc: feature signature select GCFormat no label
     mode: procedure-unsupport
     db: db1
     sql: |
-      select gcformat(
+      select concat("#", gcformat(
           discrete(hash64("x"), 1),
           continuous(pow(10, 30)),
           continuous(-pow(10, 1000)),
-          continuous(abs(sqrt(-1)))) as instance;
+          continuous(abs(sqrt(-1))))) as instance;
     expect:
       schema: instance:string
       data: |
-        | 1:0 2:0:1000000000000000019884624838656.000000 3:0:-inf 4:0:nan
+        # | 1:0 2:3353244675891348105:1000000000000000019884624838656.000000 3:7262150054277104024:-inf 4:3255232038643208583:nan
   - id: 3
     desc: feature signature GCFormat null
     mode: procedure-unsupport
     db: db1
     sql: |
-      select gcformat(
+      select concat("#", gcformat(
           regression_label(2),
           regression_label(int("null")),
           continuous(int("null")),
@@ -98,31 +98,31 @@ cases:
           discrete(3, -100),
           discrete(3),
           continuous(0.0),
-          continuous(int("null"))) as instance;
+          continuous(int("null")))) as instance;
     expect:
       schema: instance:string
       data: |
-        | 3:0:-1 4:0:2681491882390849628 5:28 8:2681491882390849628 9:0:-1 10:28 13:2681491882390849628 14:0:0.000000
+        # | 3:7262150054277104024:-1 4:3255232038643208583:2681491882390849628 5:28 8:2681491882390849628 9:-7745589761753622095:-1 10:28 13:2681491882390849628 14:398281081943027035:0.000000
   - id: 4
     desc: feature signature GCFormat no feature
     mode: procedure-unsupport
     db: db1
     sql: |
-      select gcformat(binary_label(false));
+      select concat(gcformat(binary_label(false)), "#") as instance;
     expect:
-      schema: gcformat(binary_label(false)):string
+      schema: instance:string
       data: |
-        0|
+        0 | #
   - id: 5
     desc: feature signature GCFormat nothing
     mode: procedure-unsupport
     db: db1
     sql: |
-      select gcformat();
+      select concat(concat("#", gcformat()), "#") as instance;
     expect:
-      schema: gcformat():string
+      schema: instance:string
       data: |
-        |
+        # | #
   - id: 6
     desc: feature signature CSV no label
     mode: procedure-unsupport
@@ -136,7 +136,7 @@ cases:
     expect:
       columns: [instance:string]
       rows:
-        - [",,,628"]
+        - [ ",,,628" ]
   - id: 7
     desc: feature signature CSV null
     mode: procedure-unsupport
@@ -163,7 +163,7 @@ cases:
     expect:
       columns: [ "instance:string "]
       rows:
-        - ["2,,,,-1,2681491882390849628,28,,,2681491882390849628,-1,28,,,2681491882390849628,0.000000,"]
+        - [ "2,,,,-1,2681491882390849628,28,,,2681491882390849628,-1,28,,,2681491882390849628,0.000000," ]
   - id: 8
     desc: feature signature CSV no feature
     mode: procedure-unsupport
@@ -263,7 +263,7 @@ cases:
     expect:
       schema: instance:string
       data: |
-        1| 1:0:0 2:0:1 3:0
+        1 | 1:5925585971146611297:0 2:3353244675891348105:1 3:0
   - id: 15
     desc: feature signature select GCFormat from
     mode: request-unsupport
@@ -289,11 +289,11 @@ cases:
       schema: instance:string
       order: instance
       data: |
-        1| 1:0:0 2:0:1 3:0
-        2| 1:0:0 2:0:2 3:0
-        3| 1:0:1 2:0:3 3:0
-        4| 1:0:1 2:0:4 3:0
-        5| 1:0:2 2:0:5 3:0
+        1 | 1:5925585971146611297:0 2:3353244675891348105:1 3:0
+        2 | 1:5925585971146611297:0 2:3353244675891348105:2 3:0
+        3 | 1:5925585971146611297:1 2:3353244675891348105:3 3:0
+        4 | 1:5925585971146611297:1 2:3353244675891348105:4 3:0
+        5 | 1:5925585971146611297:2 2:3353244675891348105:5 3:0
   - id: 16
     desc: feature signature select CSV from
     mode: request-unsupport
@@ -360,7 +360,7 @@ cases:
     mode: request-unsupport
     db: db1
     sql: |
-      SELECT gcformat(regression_label(col1)) as col1,
+      SELECT gcformat(regression_label(col1), discrete(col1, 1)) as col1,
           csv(regression_label(col1)) as col2,
           libsvm(regression_label(col1)) as col3
       FROM t1;
@@ -375,14 +375,14 @@ cases:
           1, 4, 55, 4.4, 44.4, 2, 4444
           2, 5, 55, 5.5, 55.5, 3, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
     expect:
-      schema: col1:string, col2:string, col3:string
-      order: col1
-      data: |
-        1|, 1, 1
-        2|, 2, 2
-        3|, 3, 3
-        4|, 4, 4
-        5|, 5, 5
+      columns: [ "col1:string", "col2:string", "col3:string" ]
+      order: "col1"
+      rows:
+        - [ "1 | 1:0", "1", "1" ]
+        - [ "2 | 1:0", "2", "2" ]
+        - [ "3 | 1:0", "3", "3" ]
+        - [ "4 | 1:0", "4", "4" ]
+        - [ "5 | 1:0", "5", "5" ]
   - id: 19
     desc: feature signature select from join
     mode: request-unsupport
@@ -471,15 +471,25 @@ cases:
     mode: procedure-unsupport
     db: db1
     sql: |
-      select gcformat(
+      select concat("#", gcformat(
           regression_label(2),
           continuous(1),
           continuous(int("notint")),
           continuous(0),
           continuous(0.0),
           discrete(3),
-          regression_label(int("notint"))) as instance;
+          regression_label(int("notint")))) as instance;
     expect:
       schema: instance:string
       data: |
-        | 1:0:1 3:0:0 4:0:0.000000 5:2681491882390849628
+        # | 1:5925585971146611297:1 3:7262150054277104024:0 4:3255232038643208583:0.000000 5:2681491882390849628
+  - id: 23
+    desc: hash64
+    mode: procedure-unsupport
+    db: db1
+    sql: |
+      select hash64(3) as col1, hash64(bigint(3)) as col2;
+    expect:
+      schema: col1:int64, col2:int64
+      data: |
+        2681491882390849628, 7262150054277104024
diff --git a/cases/query/udf_query.yaml b/cases/query/udf_query.yaml
@@ -536,6 +536,25 @@ cases:
       data: |
         true, true, false, false, true, false, true, false, true, false, true
 
+  - id: isin
+    mode: request-unsupport
+    inputs:
+      - name: t1
+        columns: ["col1:int32", "std_ts:timestamp", "col2:string"]
+        indexs: ["index1:col1:std_ts"]
+        rows:
+          - [1, 1590115420001, "ABCabcabc"]
+    sql: |
+      select
+        isin(2, [2,2]) as c0,
+        isin(cast(3 as int64), ARRAY<INT64>[NULL, 1, 2]) as c1
+    expect:
+      columns:
+        - c0 bool
+        - c1 bool
+      data: |
+        true, false
+
   - id: array_split
     mode: request-unsupport
     inputs:
@@ -554,6 +573,98 @@ cases:
         - c1 bool
       data: |
         true, false
+  - id: array_join
+    mode: request-unsupport
+    sql: |
+      select
+        array_join(["1", "2"], ",") c1,
+        array_join(["1", "2"], "") c2,
+        array_join(["1", "2"], cast(null as string)) c3,
+        array_join(["1", NULL, "4", "5", NULL], "-") c4,
+        array_join(array<string>[], ",") as c5
+    expect:
+      columns:
+        - c1 string
+        - c2 string
+        - c3 string
+        - c4 string
+        - c5 string
+      rows:
+        - ["1,2", "12", "12", "1-4-5", ""]
+  - id: array_combine
+    mode: request-unsupport
+    sql: |
+      select
+        array_join(array_combine("-", ["1", "2"], ["3", "4"]), ",") c0,
+    expect:
+      columns:
+        - c0 string
+      rows:
+        - ["1-3,1-4,2-3,2-4"]
+
+  - id: array_combine_2
+    desc: array_combine casting array to array<string> first
+    mode: request-unsupport
+    sql: |
+      select
+        array_join(array_combine("-", [1, 2], [3, 4]), ",") c0,
+        array_join(array_combine("-", [1, 2], array<int64>[3], ["5", "6"]), ",") c1,
+        array_join(array_combine("|", ["1"], [timestamp(1717171200000), timestamp("2024-06-02 12:00:00")]), ",") c2,
+        array_join(array_combine("|", ["1"]), ",") c3,
+    expect:
+      columns:
+        - c0 string
+        - c1 string
+        - c2 string
+        - c3 string
+      rows:
+        - ["1-3,1-4,2-3,2-4", "1-3-5,1-3-6,2-3-5,2-3-6", "1|2024-06-01 00:00:00,1|2024-06-02 12:00:00", "1"]
+  - id: array_combine_3
+    desc: null values skipped
+    mode: request-unsupport
+    sql: |
+      select
+        array_join(array_combine("-", [1, NULL], [3, 4]), ",") c0,
+        array_join(array_combine("-", ARRAY<INT>[NULL], ["9", "8"]), ",") c1,
+        array_join(array_combine(string(NULL), ARRAY<INT>[1], ["9", "8"]), ",") c2,
+    expect:
+      columns:
+        - c0 string
+        - c1 string
+        - c2 string
+      rows:
+        - ["1-3,1-4", "", "19,18"]
+  - id: array_combine_4
+    desc: construct array from table
+    mode: request-unsupport
+    inputs:
+      - name: t1
+        columns: ["col1:int32", "std_ts:timestamp", "col2:string"]
+        indexs: ["index1:col1:std_ts"]
+        rows:
+          - [1, 1590115420001, "foo"]
+          - [2, 1590115420001, "bar"]
+    sql: |
+      select
+        col1,
+        array_join(array_combine("-", [col1, 10], [col2, "c2"]), ",") c0,
+      from t1
+    expect:
+      columns:
+        - col1 int32
+        - c0 string
+      rows:
+        - [1, "1-foo,1-c2,10-foo,10-c2"]
+        - [2, "2-bar,2-c2,10-bar,10-c2"]
+  - id: array_combine_err1
+    mode: request-unsupport
+    sql: |
+      select
+        array_join(array_combine("-"), ",") c0,
+    expect:
+      success: false
+      msg: |
+        Fail to resolve expression: array_join(array_combine(-), ,)
 
   # ================================================================
   # Map data type

diff --git a/docs/en/maintain/cli.md b/docs/en/maintain/cli.md
@@ -401,7 +401,7 @@ $ ./openmldb --endpoint=172.27.2.52:9520 --role=client
 
 ### loadtable
 
-1. Load an existing table
+Load an existing table, only support memory table
 
 Command format: `loadtable table_name tid pid ttl segment_cnt`
 

diff --git a/docs/en/openmldb_sql/data_types/composite_types.md b/docs/en/openmldb_sql/data_types/composite_types.md
@@ -28,3 +28,5 @@ select map (1, "12", 2, "100")[2]
 1. Generally not recommended to store a map value with too much key-value pairs, since it's a row-based storage model.
 2. Map data type can not used as the key or ts column of table index, queries can not be optimized based on specific key value inside a map column neither.
 3. Query a key-value in a map takes `O(n)` complexity at most.
+4. Currently, it is not allowed to output a map type value from a SQL query, however you can access information about the map value using map-related expressions. For example, you may use `[]` operator over a `map<int, int>` type to extract value of specific key.
+
diff --git a/docs/en/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md b/docs/en/openmldb_sql/ddl/CREATE_TABLE_STATEMENT.md
@@ -50,15 +50,16 @@ ColumnName ::=
     Identifier ( '.' Identifier ( '.' Identifier )? )?      
 
 ColumnType ::=
-						'INT' | 'INT32'
-						|'SMALLINT' | 'INT16'
-						|'BIGINT' | 'INT64'
-						|'FLOAT'
-						|'DOUBLE'
-						|'TIMESTAMP'
-						|'DATE'
-						|'BOOL'
-						|'STRING' | 'VARCHAR'
+    'INT' | 'INT32'
+    |'SMALLINT' | 'INT16'
+    |'BIGINT' | 'INT64'
+    |'FLOAT'
+    |'DOUBLE'
+    |'TIMESTAMP'
+    |'DATE'
+    |'BOOL'
+    |'STRING' | 'VARCHAR'
+    | 'MAP' '<' ColumnType ',' ColumnType '>'
 
 ColumnOptionList ::= 
     ColumnOption*	

diff --git a/docs/en/openmldb_sql/dql/SELECT_STATEMENT.md b/docs/en/openmldb_sql/dql/SELECT_STATEMENT.md
@@ -142,6 +142,6 @@ Parentheses `()` expression is the minimal unit to a request row, every expressi
 -- executing SQL as request mode, with request row (10, "foo", timestamp(4000))
 SELECT id, count (val) over (partition by id order by ts rows between 10 preceding and current row)
 FROM t1
-CONFIG (execute_mode = 'online', values = (10, "foo", timestamp (4000)))
+CONFIG (execute_mode = 'request', values = (10, "foo", timestamp (4000)))
 ```