Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for external tables with qualified names #12645

Merged
merged 9 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions datafusion/core/src/catalog_common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,7 @@ pub fn resolve_table_references(
let _ = s.as_ref().visit(visitor);
}
DFStatement::CreateExternalTable(table) => {
visitor
.relations
.insert(ObjectName(vec![Ident::from(table.name.as_str())]));
visitor.relations.insert(table.name.clone());
}
DFStatement::CopyTo(CopyToStatement { source, .. }) => match source {
CopyToSource::Relation(table_name) => {
Expand Down
39 changes: 20 additions & 19 deletions datafusion/sql/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ pub(crate) type LexOrdering = Vec<OrderByExpr>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CreateExternalTable {
/// Table name
pub name: String,
pub name: ObjectName,
/// Optional schema
pub columns: Vec<ColumnDef>,
/// File type (Parquet, NDJSON, CSV, etc)
Expand Down Expand Up @@ -813,7 +813,7 @@ impl<'a> DFParser<'a> {
}

let create = CreateExternalTable {
name: table_name.to_string(),
name: table_name,
columns,
file_type: builder.file_type.unwrap(),
location: builder.location.unwrap(),
Expand Down Expand Up @@ -915,8 +915,9 @@ mod tests {
// positive case
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
let display = None;
let name = ObjectName(vec![Ident::from("t")]);
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -932,7 +933,7 @@ mod tests {
// positive case: leading space
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -949,7 +950,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ;";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -966,7 +967,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -986,7 +987,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -1013,7 +1014,7 @@ mod tests {
];
for (sql, compression) in sqls {
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -1033,7 +1034,7 @@ mod tests {
// positive case: it is ok for parquet files not to have columns specified
let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "PARQUET".to_string(),
location: "foo.parquet".into(),
Expand All @@ -1049,7 +1050,7 @@ mod tests {
// positive case: it is ok for parquet files to be other than upper case
let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "PARQUET".to_string(),
location: "foo.parquet".into(),
Expand All @@ -1065,7 +1066,7 @@ mod tests {
// positive case: it is ok for avro files not to have columns specified
let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "AVRO".to_string(),
location: "foo.avro".into(),
Expand All @@ -1082,7 +1083,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "PARQUET".to_string(),
location: "foo.parquet".into(),
Expand All @@ -1099,7 +1100,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(None)),
make_column_def("p1", DataType::Int(None)),
Expand Down Expand Up @@ -1132,7 +1133,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "X".to_string(),
location: "blahblah".into(),
Expand All @@ -1149,7 +1150,7 @@ mod tests {
let sql =
"CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![],
file_type: "X".to_string(),
location: "blahblah".into(),
Expand Down Expand Up @@ -1188,7 +1189,7 @@ mod tests {
];
for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
location: "foo.csv".into(),
Expand All @@ -1214,7 +1215,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(display)),
make_column_def("c2", DataType::Int(display)),
Expand Down Expand Up @@ -1253,7 +1254,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(display)),
make_column_def("c2", DataType::Int(display)),
Expand Down Expand Up @@ -1297,7 +1298,7 @@ mod tests {
'TRUNCATE' 'NO',
'format.has_header' 'true')";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
name: name.clone(),
columns: vec![
make_column_def("c1", DataType::Int(None)),
make_column_def("c2", DataType::Float(None)),
Expand Down
3 changes: 1 addition & 2 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1239,8 +1239,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let ordered_exprs =
self.build_order_by(order_exprs, &df_schema, &mut planner_context)?;

// External tables do not support schemas at the moment, so the name is just a table name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎉

let name = TableReference::bare(name);
let name = self.object_name_to_table_reference(name)?;
let constraints =
Constraints::new_from_table_constraints(&all_constraints, &df_schema)?;
Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
Expand Down
7 changes: 7 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1913,6 +1913,13 @@ fn create_external_table_with_pk() {
quick_test(sql, expected);
}

#[test]
fn create_external_table_wih_schema() {
let sql = "CREATE EXTERNAL TABLE staging.foo STORED AS CSV LOCATION 'foo.csv'";
let expected = "CreateExternalTable: Partial { schema: \"staging\", table: \"foo\" }";
quick_test(sql, expected);
}

#[test]
fn create_schema_with_quoted_name() {
let sql = "CREATE SCHEMA \"quoted_schema_name\"";
Expand Down
12 changes: 12 additions & 0 deletions datafusion/sqllogictest/test_files/create_external_table.slt
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,15 @@ DROP TABLE t;
# query should fail with bad column
statement error DataFusion error: Error during planning: Column foo is not in schema
CREATE EXTERNAL TABLE t STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet' WITH ORDER (foo);

# Create external table with qualified name should belong to the schema
statement ok
CREATE SCHEMA staging;

statement ok
CREATE EXTERNAL TABLE staging.foo STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet';

# Create external table with qualified name, but no schema should error
statement error DataFusion error: Error during planning: failed to resolve schema: release
CREATE EXTERNAL TABLE release.bar STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet';