Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create temporary files for reading or writing #8005

Merged
merged 3 commits into from
Nov 1, 2023
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions datafusion-examples/examples/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::error::Result;
use datafusion::prelude::*;
use std::fs;
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
use tempfile::tempdir;

/// This example demonstrates executing a simple query against an Arrow data source (Parquet) and
/// fetching results, using the DataFrame trait
Expand All @@ -41,12 +44,20 @@ async fn main() -> Result<()> {
// print the results
df.show().await?;

// create a csv file waiting to be written
let path = "example.csv";
let dir = tempdir()?;
let file_path = PathBuf::from(dir.path()).join(path);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might be able to express this more concisely like this, perhaps

Suggested change
let file_path = PathBuf::from(dir.path()).join(path);
let file_path = dir.path().join("example.csv");

let file = File::create(&file_path)?;
write_csv_file(file);

// Reading CSV file with inferred schema example
let csv_df = example_read_csv_file_with_inferred_schema().await;
let csv_df =
example_read_csv_file_with_inferred_schema(file_path.to_str().unwrap()).await;
csv_df.show().await?;

// Reading CSV file with defined schema
let csv_df = example_read_csv_file_with_schema().await;
let csv_df = example_read_csv_file_with_schema(file_path.to_str().unwrap()).await;
csv_df.show().await?;

// Reading PARQUET file and print describe
Expand All @@ -59,31 +70,28 @@ async fn main() -> Result<()> {
}

// Function to create an test CSV file
fn create_csv_file(path: String) {
fn write_csv_file(mut file: File) {
// Create the data to put into the csv file with headers
let content = r#"id,time,vote,unixtime,rating
a1,"10 6, 2013",3,1381017600,5.0
a2,"08 9, 2013",2,1376006400,4.5"#;
// write the data
fs::write(path, content).expect("Problem with writing file!");
file.write_all(content.as_ref())
.expect("Problem with writing file!");
}

// Example to read data from a csv file with inferred schema
async fn example_read_csv_file_with_inferred_schema() -> DataFrame {
let path = "example.csv";
// Create a csv file using the predefined function
create_csv_file(path.to_string());
async fn example_read_csv_file_with_inferred_schema(file_path: &str) -> DataFrame {
// Create a session context
let ctx = SessionContext::new();
// Register a lazy DataFrame using the context
ctx.read_csv(path, CsvReadOptions::default()).await.unwrap()
ctx.read_csv(file_path, CsvReadOptions::default())
.await
.unwrap()
}

// Example to read csv file with a defined schema for the csv file
async fn example_read_csv_file_with_schema() -> DataFrame {
let path = "example.csv";
// Create a csv file using the predefined function
create_csv_file(path.to_string());
async fn example_read_csv_file_with_schema(file_path: &str) -> DataFrame {
// Create a session context
let ctx = SessionContext::new();
// Define the schema
Expand All @@ -101,5 +109,5 @@ async fn example_read_csv_file_with_schema() -> DataFrame {
..Default::default()
};
// Register a lazy DataFrame by using the context and option provider
ctx.read_csv(path, csv_read_option).await.unwrap()
ctx.read_csv(file_path, csv_read_option).await.unwrap()
}