Skip to content

Commit

Permalink
Add support for external tables with qualified names (#12645)
Browse files Browse the repository at this point in the history
* Make  support schemas

* Set default name to table

* Remove print statements and stale comment

* Add tests for create table

* Fix typo

* Update datafusion/sql/src/statement.rs

Co-authored-by: Jonah Gao <[email protected]>

* convert create_external_table to objectname

* Add sqllogic tests

* Fix failing tests

---------

Co-authored-by: Jonah Gao <[email protected]>
  • Loading branch information
OussamaSaoudi and jonahgao authored Oct 1, 2024
1 parent 23d7fff commit 35adf47
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 24 deletions.
4 changes: 1 addition & 3 deletions datafusion/core/src/catalog_common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,7 @@ pub fn resolve_table_references(
let _ = s.as_ref().visit(visitor);
}
DFStatement::CreateExternalTable(table) => {
visitor
.relations
.insert(ObjectName(vec![Ident::from(table.name.as_str())]));
visitor.relations.insert(table.name.clone());
}
DFStatement::CopyTo(CopyToStatement { source, .. }) => match source {
CopyToSource::Relation(table_name) => {
Expand Down
39 changes: 20 additions & 19 deletions datafusion/sql/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ pub(crate) type LexOrdering = Vec<OrderByExpr>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CreateExternalTable {
/// Table name
pub name: String,
pub name: ObjectName,
/// Optional schema
pub columns: Vec<ColumnDef>,
/// File type (Parquet, NDJSON, CSV, etc)
Expand Down Expand Up @@ -813,7 +813,7 @@ impl<'a> DFParser<'a> {
}

let create = CreateExternalTable {
name: table_name.to_string(),
name: table_name,
columns,
file_type: builder.file_type.unwrap(),
location: builder.location.unwrap(),
Expand Down Expand Up @@ -915,8 +915,9 @@ mod tests {
// positive case
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
let display = None;
let name = ObjectName(vec![Ident::from("t")]);
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -932,7 +933,7 @@ mod tests {
// positive case: leading space
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -949,7 +950,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ;";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -966,7 +967,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -986,7 +987,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -1013,7 +1014,7 @@ mod tests {
];
for (sql, compression) in sqls {
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -1033,7 +1034,7 @@ mod tests {
// positive case: it is ok for parquet files not to have columns specified
let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "PARQUET".to_string(),
location: "foo.parquet".into(),
Expand All @@ -1049,7 +1050,7 @@ mod tests {
// positive case: it is ok for parquet files to be other than upper case
let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "PARQUET".to_string(),
location: "foo.parquet".into(),
Expand All @@ -1065,7 +1066,7 @@ mod tests {
// positive case: it is ok for avro files not to have columns specified
let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "AVRO".to_string(),
location: "foo.avro".into(),
Expand All @@ -1082,7 +1083,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "PARQUET".to_string(),
location: "foo.parquet".into(),
Expand All @@ -1099,7 +1100,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(None)),
make_column_def("p1", DataType::Int(None)),
Expand Down Expand Up @@ -1132,7 +1133,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "X".to_string(),
location: "blahblah".into(),
Expand All @@ -1149,7 +1150,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "X".to_string(),
location: "blahblah".into(),
Expand Down Expand Up @@ -1188,7 +1189,7 @@ mod tests {
];
for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -1214,7 +1215,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(display)),
make_column_def("c2", DataType::Int(display)),
Expand Down Expand Up @@ -1253,7 +1254,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(display)),
make_column_def("c2", DataType::Int(display)),
Expand Down Expand Up @@ -1297,7 +1298,7 @@ mod tests {
'TRUNCATE' 'NO',
'format.has_header' 'true')";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(None)),
make_column_def("c2", DataType::Float(None)),
Expand Down
3 changes: 1 addition & 2 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1239,8 +1239,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let ordered_exprs =
self.build_order_by(order_exprs, &df_schema, &mut planner_context)?;

// External tables do not support schemas at the moment, so the name is just a table name
let name = TableReference::bare(name);
let name = self.object_name_to_table_reference(name)?;
let constraints =
Constraints::new_from_table_constraints(&all_constraints, &df_schema)?;
Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
Expand Down
7 changes: 7 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1913,6 +1913,13 @@ fn create_external_table_with_pk() {
quick_test(sql, expected);
}

#[test]
fn create_external_table_wih_schema() {
let sql = "CREATE EXTERNAL TABLE staging.foo STORED AS CSV LOCATION 'foo.csv'";
let expected = "CreateExternalTable: Partial { schema: \"staging\", table: \"foo\" }";
quick_test(sql, expected);
}

#[test]
fn create_schema_with_quoted_name() {
let sql = "CREATE SCHEMA \"quoted_schema_name\"";
Expand Down
12 changes: 12 additions & 0 deletions datafusion/sqllogictest/test_files/create_external_table.slt
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,15 @@ DROP TABLE t;
# query should fail with bad column
statement error DataFusion error: Error during planning: Column foo is not in schema
CREATE EXTERNAL TABLE t STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet' WITH ORDER (foo);

# Create external table with qualified name should belong to the schema
statement ok
CREATE SCHEMA staging;

statement ok
CREATE EXTERNAL TABLE staging.foo STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet';

# Create external table with qualified name, but no schema should error
statement error DataFusion error: Error during planning: failed to resolve schema: release
CREATE EXTERNAL TABLE release.bar STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet';

0 comments on commit 35adf47

Please sign in to comment.