From 0041d13be76aa588aa4eef2621abc7f8b3557721 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Sun, 24 Dec 2023 11:32:46 +0000 Subject: [PATCH 1/4] Support SQLite column definitions with no type --- src/ast/data_type.rs | 3 +++ src/ast/ddl.rs | 6 +++++- src/parser/mod.rs | 29 ++++++++++++++++++++++++++++- tests/sqlparser_sqlite.rs | 5 +++++ 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 315d22b5a..e89d0482e 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -219,6 +219,8 @@ pub enum DataType { /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Struct(Vec), + /// Specific to SQLite: no type specified (no coercion) + Unspecified, } impl fmt::Display for DataType { @@ -379,6 +381,7 @@ impl fmt::Display for DataType { write!(f, "STRUCT") } } + DataType::Unspecified => Ok(()), } } } diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 961bbef79..1e1fee570 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -516,7 +516,11 @@ pub struct ColumnDef { impl fmt::Display for ColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} {}", self.name, self.data_type)?; + if self.data_type == DataType::Unspecified { + write!(f, "{}", self.name)?; + } else { + write!(f, "{} {}", self.name, self.data_type)?; + } if let Some(collation) = &self.collation { write!(f, " COLLATE {collation}")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d6bc66e4b..3b83ecea7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4183,7 +4183,11 @@ impl<'a> Parser<'a> { pub fn parse_column_def(&mut self) -> Result { let name = self.parse_identifier()?; - let data_type = self.parse_data_type()?; + let data_type = if self.sqlite_untyped_col_helper() { + DataType::Unspecified + } else { + self.parse_data_type()? + }; let mut collation = if self.parse_keyword(Keyword::COLLATE) { Some(self.parse_object_name()?) } else { @@ -4219,6 +4223,29 @@ impl<'a> Parser<'a> { }) } + fn sqlite_untyped_col_helper(&mut self) -> bool { + if dialect_of!(self is SQLiteDialect) { + match self.peek_token().token { + Token::Word(word) => match word.keyword { + Keyword::CONSTRAINT + | Keyword::PRIMARY + | Keyword::NOT + | Keyword::UNIQUE + | Keyword::CHECK + | Keyword::DEFAULT + | Keyword::COLLATE + | Keyword::REFERENCES + | Keyword::GENERATED + | Keyword::AS => true, + _ => false, + }, + _ => true, // e.g. comma immediately after column name + } + } else { + false + } + } + pub fn parse_optional_column_option(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { Ok(Some(ColumnOption::CharacterSet(self.parse_object_name()?))) diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 0bf91d5ba..8de862fc2 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -221,6 +221,11 @@ fn parse_create_table_gencol() { sqlite_and_generic().verified_stmt("CREATE TABLE t1 (a INT, b INT AS (a * 2) STORED)"); } +#[test] +fn parse_create_table_untyped() { + sqlite().verified_stmt("CREATE TABLE t1 (a, b AS (a * 2), c NOT NULL)"); +} + #[test] fn test_placeholder() { // In postgres, this would be the absolute value operator '@' applied to the column 'xxx' From 1bd60c1ff954d2068340acd4a7057475d1603f6e Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Sun, 24 Dec 2023 12:24:39 +0000 Subject: [PATCH 2/4] Follow linter suggestion to use matches!() macro --- src/parser/mod.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3b83ecea7..27e41c0b5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4226,19 +4226,19 @@ impl<'a> Parser<'a> { fn sqlite_untyped_col_helper(&mut self) -> bool { if dialect_of!(self is SQLiteDialect) { match self.peek_token().token { - Token::Word(word) => match word.keyword { + Token::Word(word) => matches!( + word.keyword, Keyword::CONSTRAINT - | Keyword::PRIMARY - | Keyword::NOT - | Keyword::UNIQUE - | Keyword::CHECK - | Keyword::DEFAULT - | Keyword::COLLATE - | Keyword::REFERENCES - | Keyword::GENERATED - | Keyword::AS => true, - _ => false, - }, + | Keyword::PRIMARY + | Keyword::NOT + | Keyword::UNIQUE + | Keyword::CHECK + | Keyword::DEFAULT + | Keyword::COLLATE + | Keyword::REFERENCES + | Keyword::GENERATED + | Keyword::AS + ), _ => true, // e.g. comma immediately after column name } } else { From 18b9578780a3fda3e00779de3ce6d5f8d9339533 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Mon, 1 Jan 2024 17:13:34 +0000 Subject: [PATCH 3/4] Rename method to check for columns without types --- src/parser/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 27e41c0b5..25249101d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4183,7 +4183,7 @@ impl<'a> Parser<'a> { pub fn parse_column_def(&mut self) -> Result { let name = self.parse_identifier()?; - let data_type = if self.sqlite_untyped_col_helper() { + let data_type = if self.is_column_type_sqlite_unspecified() { DataType::Unspecified } else { self.parse_data_type()? @@ -4223,7 +4223,7 @@ impl<'a> Parser<'a> { }) } - fn sqlite_untyped_col_helper(&mut self) -> bool { + fn is_column_type_sqlite_unspecified(&mut self) -> bool { if dialect_of!(self is SQLiteDialect) { match self.peek_token().token { Token::Word(word) => matches!( From 2b1cd793a595f7ff50576e8d6ba3e1d8d4c6c6d7 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Mon, 1 Jan 2024 17:37:12 +0000 Subject: [PATCH 4/4] Document how SQLite columns with no type specified are handled --- src/ast/data_type.rs | 4 +++- src/dialect/mod.rs | 4 +++- src/dialect/sqlite.rs | 5 +++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index e89d0482e..6b0ecd815 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -219,7 +219,9 @@ pub enum DataType { /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Struct(Vec), - /// Specific to SQLite: no type specified (no coercion) + /// No type specified - only used with + /// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such + /// as `CREATE TABLE t1 (a)`. Unspecified, } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index eab30ccd2..2e94aaa86 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -65,7 +65,9 @@ macro_rules! dialect_of { /// encapsulates the parsing differences between dialects. /// /// [`GenericDialect`] is the most permissive dialect, and parses the union of -/// all the other dialects, when there is no ambiguity. +/// all the other dialects, when there is no ambiguity. However, it does not +/// currently allow `CREATE TABLE` statements without types specified for all +/// columns; use [`SQLiteDialect`] if you require that. /// /// # Examples /// Most users create a [`Dialect`] directly, as shown on the [module diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 0640466c8..622fddee6 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -16,6 +16,11 @@ use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; /// A [`Dialect`] for [SQLite](https://www.sqlite.org) +/// +/// This dialect allows columns in a +/// [`CREATE TABLE`](https://sqlite.org/lang_createtable.html) statement with no +/// type specified, as in `CREATE TABLE t1 (a)`. In the AST, these columns will +/// have the data type [`Unspecified`](crate::ast::DataType::Unspecified). #[derive(Debug)] pub struct SQLiteDialect {}