diff --git a/.github/actions/exa-cluster/action.yaml b/.github/actions/exa-cluster/action.yaml new file mode 100644 index 0000000..3f27b74 --- /dev/null +++ b/.github/actions/exa-cluster/action.yaml @@ -0,0 +1,70 @@ +name: "Exasol Cluster" +description: "Instantiates an Exasol database cluster" +inputs: + exasol-version: + description: "Version of the Exasol database" + required: true + num-nodes: + description: "Number of nodes to spawn in the cluster" + required: true +outputs: + no-tls-url: + description: "Connection string for the database with TLS disabled" + value: ${{ steps.connection-strings.outputs.no-tls-url }} + tls-url: + description: "Connection string for the database with TLS enabled" + value: ${{ steps.connection-strings.outputs.tls-url }} +runs: + using: "composite" + steps: + - name: Restore Exasol image + id: cache-docker-exasol + uses: actions/cache@v3 + with: + path: ~/ci/cache/docker + key: cache-docker-exasol-${{ inputs.exasol-version }} + + - name: Store Exasol image if cache miss + if: steps.cache-docker-exasol.outputs.cache-hit != 'true' + run: docker pull exasol/docker-db:${{ inputs.exasol-version }} && mkdir -p ~/ci/cache/docker && docker image save exasol/docker-db:${{ inputs.exasol-version }} --output ~/ci/cache/docker/exasol-${{ inputs.exasol-version }}.tar + shell: bash + + - name: Load Exasol image + run: docker image load --input ~/ci/cache/docker/exasol-${{ inputs.exasol-version }}.tar + shell: bash + + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pipenv + shell: bash + + - uses: actions/checkout@v3 + with: + repository: Exasol/docker-db + ref: ${{ inputs.exasol-version }} + + - name: Set up Exasol Cluster + run: | + pipenv install -r exadt_requirements.txt + pipenv run ./exadt create-cluster --root ~/sqlx --create-root sqlx + pipenv run ./exadt init-cluster --image exasol/docker-db:${{ inputs.exasol-version }} --license ./license/license.xml --num-nodes ${{ inputs.num-nodes }} --auto-storage sqlx + pipenv run ./exadt start-cluster sqlx + shell: bash + + # The exadt tool always creates the Exasol cluster with this subnet + - name: Create connection strings + id: connection-strings + run: | + DATABASE_URL="exa://sys:exasol@10.10.10.11..1$NUM_NODES:8563" + echo "no-tls-url=$DATABASE_URL?ssl-mode=disabled" >> $GITHUB_OUTPUT + echo "tls-url=$DATABASE_URL?ssl-mode=required" >> $GITHUB_OUTPUT + shell: bash + + # Checkout the main repository again + - uses: actions/checkout@v3 diff --git a/.github/actions/free-space/action.yaml b/.github/actions/free-space/action.yaml new file mode 100644 index 0000000..af0bc5f --- /dev/null +++ b/.github/actions/free-space/action.yaml @@ -0,0 +1,29 @@ +name: "Free disk space" +description: "Deletes language runtimes and implicit docker images to free up space in the runner" +runs: + using: "composite" + steps: + - name: Get initial disk space + run: | + echo "Getting initial free space" + df . -h + shell: bash + + - name: Remove language runtimes + run: | + sudo rm -rf \ + /usr/share/dotnet /usr/local/lib/android /opt/ghc \ + /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ + /usr/lib/jvm /opt/hostedtoolcache/CodeQL || true + + echo "Removed language runtimes" + df . -h + shell: bash + + - name: Remove docker images + run: | + sudo docker image prune --all --force + + echo "Removed docker images" + df . -h + shell: bash diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index a687226..2987cd8 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -12,7 +12,7 @@ env: EXASOL_VERSION: 7.0.22 NUM_NODES: 2 ETL_TEST_THREADS: 4 - ETL_JOB_TIMEOUT: 10 + ETL_JOB_TIMEOUT: 20 jobs: format: @@ -21,7 +21,8 @@ jobs: steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - name: Setup Rust toolchain + uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly @@ -52,7 +53,8 @@ jobs: steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - name: Setup Rust toolchain + uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable @@ -68,66 +70,23 @@ jobs: env: RUSTFLAGS: -D warnings - tests: - name: Tests + connection_tests: + name: Connection tests needs: clippy runs-on: ubuntu-20.04 steps: - - name: Free disk space - run: | - echo "Getting initial free space" - df . -h - - sudo rm -rf \ - /usr/share/dotnet /usr/local/lib/android /opt/ghc \ - /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ - /usr/lib/jvm /opt/hostedtoolcache/CodeQL || true - - echo "Removed language runtimes" - df . -h - - sudo docker image prune --all --force - - echo "Removed docker images" - df . -h - - - name: Restore Exasol image - id: cache-docker-exasol - uses: actions/cache@v3 - with: - path: ~/ci/cache/docker - key: cache-docker-exasol-${{ env.EXASOL_VERSION }} - - - name: Store Exasol image if cache miss - if: steps.cache-docker-exasol.outputs.cache-hit != 'true' - run: docker pull exasol/docker-db:${{ env.EXASOL_VERSION }} && mkdir -p ~/ci/cache/docker && docker image save exasol/docker-db:${{ env.EXASOL_VERSION }} --output ~/ci/cache/docker/exasol-${{ env.EXASOL_VERSION }}.tar - - - name: Load Exasol image - run: docker image load --input ~/ci/cache/docker/exasol-${{ env.EXASOL_VERSION }}.tar - - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" + - uses: actions/checkout@v3 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pipenv + - name: Free disk space + uses: ./.github/actions/free-space - - uses: actions/checkout@v3 + - name: Create Exasol cluster + id: exa-cluster + uses: ./.github/actions/exa-cluster with: - repository: Exasol/docker-db - ref: ${{ env.EXASOL_VERSION }} - - - name: Set up Exasol Cluster - run: | - pipenv install -r exadt_requirements.txt - pipenv run ./exadt create-cluster --root ~/sqlx --create-root sqlx - pipenv run ./exadt init-cluster --image exasol/docker-db:${{ env.EXASOL_VERSION }} --license ./license/license.xml --num-nodes ${{ env.NUM_NODES }} --auto-storage sqlx - pipenv run ./exadt start-cluster sqlx - # docker update --memory=2g --memory-swap=2g $(docker ps | awk 'NR>1 {print $1}') + exasol-version: ${{ env.EXASOL_VERSION }} + num-nodes: ${{ env.NUM_NODES }} - name: Setup Rust toolchain uses: actions-rs/toolchain@v1 @@ -138,59 +97,158 @@ jobs: - uses: Swatinem/rust-cache@v2 - - uses: actions/checkout@v3 - - # The exadt tool always creates the Exasol cluster with this subnet - - name: Create connection strings - run: | - DATABASE_URL="exa://sys:exasol@10.10.10.11..1$NUM_NODES:8563" - echo "NO_TLS_URL=$DATABASE_URL?ssl-mode=disabled" >> $GITHUB_ENV - echo "TLS_URL=$DATABASE_URL?ssl-mode=required" >> $GITHUB_ENV - - name: Connection tests run: cargo test --features migrate,rust_decimal,uuid,chrono -- --nocapture env: - DATABASE_URL: ${{ env.NO_TLS_URL }} + DATABASE_URL: ${{ steps.exa-cluster.outputs.no-tls-url }} - name: Connection tests with compression run: cargo test --features migrate,compression -- --ignored --nocapture env: - DATABASE_URL: ${{ env.NO_TLS_URL }} + DATABASE_URL: ${{ steps.exa-cluster.outputs.no-tls-url }} + + tls_connection_tests: + name: TLS connection tests + needs: clippy + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v3 + + - name: Free disk space + uses: ./.github/actions/free-space + + - name: Create Exasol cluster + id: exa-cluster + uses: ./.github/actions/exa-cluster + with: + exasol-version: ${{ env.EXASOL_VERSION }} + num-nodes: ${{ env.NUM_NODES }} + + - name: Setup Rust toolchain + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ env.RUST_TOOLCHAIN }} + override: true + + - uses: Swatinem/rust-cache@v2 - name: TLS connection tests run: cargo test --features migrate,rust_decimal,uuid,chrono -- --nocapture env: - DATABASE_URL: ${{ env.TLS_URL }} + DATABASE_URL: ${{ steps.exa-cluster.outputs.tls-url }} - name: TLS connection tests with compression run: cargo test --features migrate,compression -- --ignored --nocapture env: - DATABASE_URL: ${{ env.TLS_URL }} + DATABASE_URL: ${{ steps.exa-cluster.outputs.tls-url }} - - name: Tests compilation failure if both ETL TLS features are enabled - run: cargo test --features etl_native_tls,etl_rustls || true - env: - DATABASE_URL: ${{ env.TLS_URL }} + etl_tests: + name: ETL tests + needs: clippy + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v3 + + - name: Free disk space + uses: ./.github/actions/free-space + + - name: Create Exasol cluster + id: exa-cluster + uses: ./.github/actions/exa-cluster + with: + exasol-version: ${{ env.EXASOL_VERSION }} + num-nodes: ${{ env.NUM_NODES }} + + - name: Setup Rust toolchain + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ env.RUST_TOOLCHAIN }} + override: true + + - uses: Swatinem/rust-cache@v2 - name: ETL tests timeout-minutes: ${{ fromJSON(env.ETL_JOB_TIMEOUT) }} run: cargo test --features migrate,compression,etl -- --ignored --nocapture --test-threads ${{ env.ETL_TEST_THREADS }} env: - DATABASE_URL: ${{ env.NO_TLS_URL }} + DATABASE_URL: ${{ steps.exa-cluster.outputs.no-tls-url }} - name: ETL without TLS feature but TLS connection (should fail) run: cargo test --features migrate,etl -- --ignored --nocapture --test-threads ${{ env.ETL_TEST_THREADS }} || true env: - DATABASE_URL: ${{ env.TLS_URL }} + DATABASE_URL: ${{ steps.exa-cluster.outputs.tls-url }} + + - name: Tests compilation failure if both ETL TLS features are enabled + run: cargo test --features etl_native_tls,etl_rustls || true + env: + DATABASE_URL: ${{ steps.exa-cluster.outputs.tls-url }} + + native_tls_etl_tests: + name: ETL tests with native-tls + needs: clippy + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v3 + + - name: Free disk space + uses: ./.github/actions/free-space + + - name: Create Exasol cluster + id: exa-cluster + uses: ./.github/actions/exa-cluster + with: + exasol-version: ${{ env.EXASOL_VERSION }} + num-nodes: ${{ env.NUM_NODES }} + + - name: Setup Rust toolchain + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ env.RUST_TOOLCHAIN }} + override: true + + - uses: Swatinem/rust-cache@v2 - name: Native-TLS ETL tests timeout-minutes: ${{ fromJSON(env.ETL_JOB_TIMEOUT) }} run: cargo test --features migrate,compression,etl_native_tls -- --ignored --nocapture --test-threads ${{ env.ETL_TEST_THREADS }} env: - DATABASE_URL: ${{ env.TLS_URL }} + DATABASE_URL: ${{ steps.exa-cluster.outputs.tls-url }} + + rustls_etl_tests: + name: ETL tests with rustls + needs: clippy + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v3 + + - name: Free disk space + uses: ./.github/actions/free-space + + - name: Create Exasol cluster + id: exa-cluster + uses: ./.github/actions/exa-cluster + with: + exasol-version: ${{ env.EXASOL_VERSION }} + num-nodes: ${{ env.NUM_NODES }} + + - name: Setup Rust toolchain + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ env.RUST_TOOLCHAIN }} + override: true + + - uses: Swatinem/rust-cache@v2 - name: Rustls ETL tests timeout-minutes: ${{ fromJSON(env.ETL_JOB_TIMEOUT) }} run: cargo test --features migrate,compression,etl_rustls -- --ignored --nocapture --test-threads ${{ env.ETL_TEST_THREADS }} env: - DATABASE_URL: ${{ env.TLS_URL }} + DATABASE_URL: ${{ steps.exa-cluster.outputs.tls-url }} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..6aea046 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,35 @@ +# Change Log +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/) +and this project adheres to [Semantic Versioning](http://semver.org/). + +## [0.7.1-alpha-3] - 2023-09-13 + +### Added + +- [#5](https://github.com/bobozaur/sqlx-exasol/issues/5): Multi-node CI testing by creating a two node database cluster. + +### Changed + +- [#16](https://github.com/bobozaur/sqlx-exasol/pull/16): Added the `CHANGELOG.md` file and `etl` module level docs +- [#10](https://github.com/bobozaur/sqlx-exasol/issues/10): Improved ExaConnectOptionsBuilder ergonomics by having its methods take `self` +- renamed `EtlWorker` trait to `EtlBufReader`. +- refactored `ExportReader` to implement and rely on `AsyncBufRead`. +- added `buffer_size()` method to `ExportBuilder` to be able to tweak the reader's buffer. + +### Fixed + +- ETL TLS with compression + +## [0.7.1-alpha-2] - 2023-09-04 + +Second alpha release. + +### Added + +- Missing connection string documentation. + +## [0.7.1-alpha-1] - 2023-09-04 + +First alpha release. \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 8048303..9d622f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sqlx-exasol" -version = "0.7.1-alpha-2" +version = "0.7.1-alpha-3" edition = "2021" authors = ["bobozaur"] description = "Exasol driver for the SQLx framework." diff --git a/README.md b/README.md index 2e3bfa2..c08ad23 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Inspired by [Py-Exasol](https://github.com/exasol/pyexasol) and based on the (no > With that in mind, please favor using a fixed version of `sqlx` and `sqlx-exasol` in `Cargo.toml` to avoid issues, such as: > ```toml > sqlx = "=0.7.1" -> sqlx-exasol = "=0.7.1-alpha-2" +> sqlx-exasol = "=0.7.1-alpha-3" > ``` diff --git a/src/connection/etl/export/mod.rs b/src/connection/etl/export/mod.rs index 2264082..3d1da4e 100644 --- a/src/connection/etl/export/mod.rs +++ b/src/connection/etl/export/mod.rs @@ -23,6 +23,12 @@ use super::SocketFuture; /// /// The type implements [`AsyncRead`] and is [`Send`] and [`Sync`] so it can be freely used /// in any data pipeline. +/// +/// # IMPORTANT +/// +/// Dropping a reader before it returned EOF will result in the `EXPORT` query returning an error. +/// While not necessarily a problem if you're not interested in the whole export, there's no way to +/// circumvent that other than handling the error in code. #[allow(clippy::large_enum_variant)] #[pin_project(project = ExaExportProj)] pub enum ExaExport { diff --git a/src/connection/etl/export/options.rs b/src/connection/etl/export/options.rs index cdc5652..d3231a0 100644 --- a/src/connection/etl/export/options.rs +++ b/src/connection/etl/export/options.rs @@ -10,6 +10,8 @@ use crate::{ }; /// A builder for an ETL EXPORT job. +/// Calling [`build().await`] will ouput a future that drives the EXPORT query execution and a +/// [`Vec`] which must be concurrently used to read data from Exasol. #[derive(Debug)] pub struct ExportBuilder<'a> { num_readers: usize, diff --git a/src/connection/etl/import/mod.rs b/src/connection/etl/import/mod.rs index 4b7b9b6..e1e2204 100644 --- a/src/connection/etl/import/mod.rs +++ b/src/connection/etl/import/mod.rs @@ -27,6 +27,36 @@ use super::SocketFuture; /// The only caveat is that you *MUST* call [`futures_util::AsyncWriteExt::close`] on each worker to /// finalize the import. Otherwise, Exasol keeps on expecting data. /// +/// # Atomicity +/// +/// `IMPORT` jobs are not atomic by themselves. If an error occurs during the data ingestion, +/// some of the data might be already sent and written in the database. However, since +/// `IMPORT` is fundamentally just a query, it *can* be transactional. Therefore, +/// beginning a transaction and passing that to the [`ImportBuilder::build`] method will result in +/// the import job needing to be explicitly committed: +/// +/// ```rust,no_run +/// use std::env; +/// +/// use sqlx_exasol::{etl::*, *}; +/// +/// # async { +/// # +/// let pool = ExaPool::connect(&env::var("DATABASE_URL").unwrap()).await?; +/// let mut con = pool.acquire().await?; +/// let mut tx = con.begin().await?; +/// +/// let (query_fut, writers) = ImportBuilder::new("SOME_TABLE").build(&mut *tx).await?; +/// +/// // concurrently use the writers and await the query future +/// +/// tx.commit().await?; +/// # +/// # let res: anyhow::Result<()> = Ok(()); +/// # res +/// # }; +/// ``` +/// /// # IMPORTANT /// /// In multi-node environments closing a writer without writing any data to it can, and most likely diff --git a/src/connection/etl/import/options.rs b/src/connection/etl/import/options.rs index 717565f..166d348 100644 --- a/src/connection/etl/import/options.rs +++ b/src/connection/etl/import/options.rs @@ -11,6 +11,8 @@ use crate::{ }; /// A builder for an ETL IMPORT job. +/// Calling [`build().await`] will ouput a future that drives the IMPORT query execution and a +/// [`Vec`] which must be concurrently used to ingest data into Exasol. #[derive(Clone, Debug)] pub struct ImportBuilder<'a> { num_writers: usize, diff --git a/src/connection/etl/mod.rs b/src/connection/etl/mod.rs index 5fd1a0e..c03e8d3 100644 --- a/src/connection/etl/mod.rs +++ b/src/connection/etl/mod.rs @@ -1,3 +1,74 @@ +//! This module provides the building blocks for creating IMPORT and EXPORT jobs. +//! These are represented by a query that gets executed concurrently with some ETL workers, both of +//! which are obtained by building the ETL job. The data format is always CSV, but there are some +//! customizations that can be done on the builders such as row or column separator, etc. +//! +//! The query execution is driven by a future obtained from building the job, and will rely +//! on using the workers (also obtained from building the job) to complete and return. The +//! results is of type [`ExaQueryResult`] which can give you the number of affected rows. +//! +//! IMPORT jobs are constructed through the [`ImportBuilder`] type and will generate workers of type +//! [`ExaImport`]. The workers can be used to write data to the database and the query execution +//! ends when all the workers have been closed (by explicitly calling `close().await`). +//! +//! EXPORT jobs are constructed through the [`ExportBuilder`] type and will generate workers of type +//! [`ExaExport`]. The workers can be used to read data from the database and the query execution +//! ends when all the workers receive EOF. They can be dropped afterwards. +//! +//! ETL jobs can use TLS, compression, or both and will do so in a +//! consistent manner with the [`ExaConnection`] they are executed on. +//! That means that if the connection uses TLS / compression, so will the ETL job. +//! +//! **NOTE:** Trying to run ETL jobs with TLS without an ETL TLS feature flag results +//! in a runtime error. Furthermore, enabling more than one ETL TLS feature results in a +//! compile time error. +//! +//! # Atomicity +//! +//! `IMPORT` jobs are not atomic by themselves. If an error occurs during the data ingestion, +//! some of the data might be already sent and written in the database. However, since +//! `IMPORT` is fundamentally just a query, it *can* be transactional. Therefore, +//! beginning a transaction and passing that to the [`ImportBuilder::build`] method will result in +//! the import job needing to be explicitly committed: +//! +//! ```rust,no_run +//! use std::env; +//! +//! use sqlx_exasol::{etl::*, *}; +//! +//! # async { +//! # +//! let pool = ExaPool::connect(&env::var("DATABASE_URL").unwrap()).await?; +//! let mut con = pool.acquire().await?; +//! let mut tx = con.begin().await?; +//! +//! let (query_fut, writers) = ImportBuilder::new("SOME_TABLE").build(&mut *tx).await?; +//! +//! // concurrently use the writers and await the query future +//! +//! tx.commit().await?; +//! # +//! # let res: anyhow::Result<()> = Ok(()); +//! # res +//! # }; +//! ``` +//! +//! # IMPORTANT +//! +//! Exasol doesn't really like it when [`ExaImport`] workers are closed without ever sending any +//! data. The underlying socket connection to Exasol will be closed, and Exasol will just try to +//! open a new one. However, workers only listen on the designated sockets once, so the connection +//! will be refused (even if it weren't, the cycle might just repeat since we'd still be sending no +//! data). +//! +//! Therefore, it is wise not to build IMPORT jobs with more workers than required, depending on the +//! amount of data to be imported and especially if certain workers won't be written to at all. +//! +//! Additionally, Exasol expects that all [`ExaExport`] are read in their entirety (until EOF is +//! reached). Failing to do so will result in the query execution returning an error. If, for some +//! reason, you do not want to exhaust the readers, be prepared to handle the error returned by the +//! `EXPORT` query. + mod error; mod export; mod import; diff --git a/src/lib.rs b/src/lib.rs index 9ec1736..06725f1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,7 @@ //! - array-like parameter binding in queries, thanks to the columnar nature of the Exasol //! database //! - performant & parallelizable ETL IMPORT/EXPORT jobs in CSV format through HTTP Transport +//! (see the [etl] module for more details) //! //! ## Connection string //! The connection string is expected to be an URL with the `exa://` scheme, e.g: