From 596f3b054bff5bab37983f9d92ba9ff838075e67 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Fri, 7 Jun 2024 12:16:59 -0500 Subject: [PATCH 01/21] Example using SQLx + SQLite --- .gitignore | 1 + LICENSE | 201 -- sqlx-sqlite/.gitignore | 2 + sqlx-sqlite/Cargo.lock | 3825 ++++++++++++++++++++++++++++++++++++++ sqlx-sqlite/Cargo.toml | 25 + sqlx-sqlite/README.md | 58 + sqlx-sqlite/index.db | Bin 0 -> 4096 bytes sqlx-sqlite/src/index.rs | 576 ++++++ sqlx-sqlite/src/main.rs | 378 ++++ 9 files changed, 4865 insertions(+), 201 deletions(-) create mode 100644 .gitignore delete mode 100644 LICENSE create mode 100644 sqlx-sqlite/.gitignore create mode 100644 sqlx-sqlite/Cargo.lock create mode 100644 sqlx-sqlite/Cargo.toml create mode 100644 sqlx-sqlite/README.md create mode 100644 sqlx-sqlite/index.db create mode 100644 sqlx-sqlite/src/index.rs create mode 100644 sqlx-sqlite/src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 261eeb9..0000000 --- a/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/sqlx-sqlite/.gitignore b/sqlx-sqlite/.gitignore new file mode 100644 index 0000000..2e0ed6d --- /dev/null +++ b/sqlx-sqlite/.gitignore @@ -0,0 +1,2 @@ +/.vscode +/target diff --git a/sqlx-sqlite/Cargo.lock b/sqlx-sqlite/Cargo.lock new file mode 100644 index 0000000..f5d87ee --- /dev/null +++ b/sqlx-sqlite/Cargo.lock @@ -0,0 +1,3825 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "const-random", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "arrow" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64 0.22.1", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-csv" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", +] + +[[package]] +name = "arrow-json" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", + "hashbrown", +] + +[[package]] +name = "arrow-schema" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" + +[[package]] +name = "arrow-select" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "async-compression" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd066d0b4ef8ecb03a55319dc13aa6910616d0f44008a045bb1835af830abff5" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + +[[package]] +name = "async-trait" +version = "0.1.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "backtrace" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64ct" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +dependencies = [ + "serde", +] + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30cca6d3674597c30ddf2c587bf8d9d65c9a84d2326d941cc79c9842dfe0ef52" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "cc" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "serde", + "windows-targets 0.52.5", +] + +[[package]] +name = "chrono-tz" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "darling_macro" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +dependencies = [ + "darling_core", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-array", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "glob", + "half", + "hashbrown", + "indexmap", + "itertools", + "log", + "num_cpus", + "object_store", + "parking_lot", + "parquet", + "paste", + "pin-project-lite", + "rand", + "sqlparser", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-common" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "hashbrown", + "instant", + "libc", + "num_cpus", + "object_store", + "parquet", + "sqlparser", +] + +[[package]] +name = "datafusion-common-runtime" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "tokio", +] + +[[package]] +name = "datafusion-execution" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "chrono", + "datafusion-common", + "paste", + "serde_json", + "sqlparser", + "strum", + "strum_macros", +] + +[[package]] +name = "datafusion-functions" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "arrow", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown", + "hex", + "itertools", + "log", + "md-5", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "ahash", + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr-common", + "log", + "paste", + "sqlparser", +] + +[[package]] +name = "datafusion-functions-array" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "itertools", + "log", + "paste", +] + +[[package]] +name = "datafusion-optimizer" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown", + "indexmap", + "itertools", + "log", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-string", + "base64 0.22.1", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr-common", + "half", + "hashbrown", + "hex", + "indexmap", + "itertools", + "log", + "paste", + "petgraph", + "regex", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-expr", + "rand", +] + +[[package]] +name = "datafusion-physical-plan" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown", + "indexmap", + "itertools", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "38.0.0" +source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "log", + "regex", + "sqlparser", + "strum", +] + +[[package]] +name = "der" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "const-oid", + "crypto-common", + "subtle", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "either" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +dependencies = [ + "serde", +] + +[[package]] +name = "encoding_rs" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flatbuffers" +version = "23.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "flume" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" +dependencies = [ + "futures-core", + "futures-sink", + "spin 0.9.8", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "h2" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "hyper" +version = "0.14.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f361cde2f109281a220d4307746cdfd5ee3f410da58a70377762396775634b33" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http", + "hyper", + "rustls", + "tokio", + "tokio-rustls", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "inherent" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0122b7114117e64a63ac49f752a5ca4624d534c7b1c7de796ac196381cd2d947" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "ipnet" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +dependencies = [ + "spin 0.5.2", +] + +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "libsqlite3-sys" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4e226dcd58b4be396f7bd3c20da8fdee2911400705297ba7d2d7cc2c30f716" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "native-tls" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +dependencies = [ + "memchr", +] + +[[package]] +name = "object_store" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" +dependencies = [ + "async-trait", + "base64 0.21.7", + "bytes", + "chrono", + "futures", + "humantime", + "hyper", + "itertools", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand", + "reqwest", + "ring", + "rustls-pemfile 2.1.2", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "openssl" +version = "0.10.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +dependencies = [ + "bitflags 2.5.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ordered-float" +version = "3.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc" +dependencies = [ + "num-traits", +] + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.5.1", + "smallvec", + "windows-targets 0.52.5", +] + +[[package]] +name = "parquet" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "096795d4f47f65fd3ee1ec5a98b77ab26d602f2cc785b0e4be5443add17ecc32" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64 0.22.1", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quick-xml" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags 2.5.0", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "reqwest" +version = "0.11.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +dependencies = [ + "base64 0.21.7", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-rustls", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls", + "rustls-native-certs", + "rustls-pemfile 1.0.4", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "system-configuration", + "tokio", + "tokio-rustls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "winreg", +] + +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom", + "libc", + "spin 0.9.8", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rsa" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core", + "signature", + "spki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", +] + +[[package]] +name = "rustls-pemfile" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +dependencies = [ + "base64 0.22.1", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "sea-query" +version = "0.30.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4166a1e072292d46dc91f31617c2a1cdaf55a8be4b5c9f4bf2ba248e3ac4999b" +dependencies = [ + "chrono", + "derivative", + "inherent", + "ordered-float 3.9.2", + "sea-query-attr", + "sea-query-derive", + "serde_json", +] + +[[package]] +name = "sea-query-attr" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168a31e0ef5a791ad26aa97c502eaed8d2a1ffdc22b3249f9947c1e12be6b477" +dependencies = [ + "darling", + "heck 0.4.1", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "sea-query-binder" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36bbb68df92e820e4d5aeb17b4acd5cc8b5d18b2c36a4dd6f4626aabfa7ab1b9" +dependencies = [ + "chrono", + "sea-query", + "serde_json", + "sqlx", +] + +[[package]] +name = "sea-query-derive" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25a82fcb49253abcb45cdcb2adf92956060ec0928635eb21b4f7a6d8f25ab0bc" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 2.0.66", + "thiserror", +] + +[[package]] +name = "security-framework" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" +dependencies = [ + "bitflags 2.5.0", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "snafu" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" +dependencies = [ + "doc-comment", + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "socket2" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "sqlformat" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c" +dependencies = [ + "itertools", + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlparser" +version = "0.47.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "sqlx" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9a2ccff1a000a5a59cd33da541d9f2fdcd9e6e8229cc200565942bff36d0aaa" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24ba59a9342a3d9bab6c56c118be528b27c9b60e490080e9711a04dccac83ef6" +dependencies = [ + "ahash", + "atoi", + "byteorder", + "bytes", + "chrono", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-channel", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashlink", + "hex", + "indexmap", + "log", + "memchr", + "native-tls", + "once_cell", + "paste", + "percent-encoding", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlformat", + "thiserror", + "tokio", + "tokio-stream", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "sqlx-macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea40e2345eb2faa9e1e5e326db8c34711317d2b5e08d0d5741619048a803127" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn 1.0.109", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8" +dependencies = [ + "dotenvy", + "either", + "heck 0.4.1", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn 1.0.109", + "tempfile", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ed31390216d20e538e447a7a9b959e06ed9fc51c37b514b46eb758016ecd418" +dependencies = [ + "atoi", + "base64 0.21.7", + "bitflags 2.5.0", + "byteorder", + "bytes", + "chrono", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror", + "tracing", + "uuid", + "whoami", +] + +[[package]] +name = "sqlx-postgres" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c824eb80b894f926f89a0b9da0c7f435d27cdd35b8c655b114e58223918577e" +dependencies = [ + "atoi", + "base64 0.21.7", + "bitflags 2.5.0", + "byteorder", + "chrono", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror", + "tracing", + "uuid", + "whoami", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa" +dependencies = [ + "atoi", + "chrono", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "sqlx-core", + "tracing", + "url", + "urlencoding", + "uuid", +] + +[[package]] +name = "sqlx-sqlite-index-example" +version = "0.1.0" +dependencies = [ + "anyhow", + "arrow", + "arrow-schema", + "async-trait", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-sql", + "object_store", + "sea-query", + "sea-query-binder", + "sqlx", + "tempfile", + "thiserror", + "time", + "tokio", + "url", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + +[[package]] +name = "strum" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.66", +] + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "thiserror" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float 2.10.1", +] + +[[package]] +name = "time" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +dependencies = [ + "deranged", + "num-conv", + "powerfmt", + "serde", + "time-core", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.38.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "pin-project-lite", + "socket2", + "tokio-macros", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-macros" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4259d9d4425d9f0661581b804cb85fe66a4c631cadd8f490d1c13a35d5d9291" + +[[package]] +name = "unicode-segmentation" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + +[[package]] +name = "unicode-width" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + +[[package]] +name = "uuid" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" +dependencies = [ + "getrandom", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.66", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "wasm-streams" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "web-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "whoami" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9" +dependencies = [ + "redox_syscall 0.4.1", + "wasite", +] + +[[package]] +name = "winapi-util" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + +[[package]] +name = "zstd" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.10+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/sqlx-sqlite/Cargo.toml b/sqlx-sqlite/Cargo.toml new file mode 100644 index 0000000..32a53e2 --- /dev/null +++ b/sqlx-sqlite/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "sqlx-sqlite-index-example" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "~1" +arrow = "51.0.0" +async-trait = "0.1.80" +datafusion = { features = ["backtrace"], git = "https://github.com/adriangb/datafusion.git" } +datafusion-expr = { git = "https://github.com/adriangb/datafusion.git" } +datafusion-optimizer = { git = "https://github.com/adriangb/datafusion.git" } +datafusion-common = { git = "https://github.com/adriangb/datafusion.git" } +datafusion-physical-expr = { git = "https://github.com/adriangb/datafusion.git" } +datafusion-sql = { git = "https://github.com/adriangb/datafusion.git" } +sqlx = { version = "~0", features = [ "runtime-tokio", "tls-native-tls", "sqlite", "json", "uuid", "chrono", "macros" ] } +tempfile = "3.10.1" +thiserror = "~1" +tokio = { version = "~1", features = [ "rt-multi-thread", "macros" ] } +url = "2.5.0" +arrow-schema = "51.0.0" +object_store = { version = "0.9.1", features = ["gcp", "http"] } +time = "0.3.36" +sea-query = { version = "0", features = ["with-chrono", "backend-sqlite", "hashable-value", "with-json", "with-chrono", "derive", "attr", "thread-safe"] } +sea-query-binder = { version = "0", features = ["sqlx-sqlite", "with-json", "with-chrono", ] } diff --git a/sqlx-sqlite/README.md b/sqlx-sqlite/README.md new file mode 100644 index 0000000..6135e60 --- /dev/null +++ b/sqlx-sqlite/README.md @@ -0,0 +1,58 @@ +# DataFusion secondary index example using SQLx + +This example demonstrates how to integrate a secondary index built using SQLite via SQLx with DataFusion. + +SQLite is used as a stand-in for an external remote relational database, it should be easy to adapt this example to use another database. + +This examples should be considered incomplete: it does not try to handle **many** edge cases or push down filters as much as possible. +It is meant to sketch out the basic idea, not be a complete implementation. + +## Running the example + +To run the example you just need to have Rust and Cargo installed. Then you can run just `cargo run`. + +You'll see output like this: + +```text +** Table Provider: +IndexTableProvider +---- Index ---- +SQLiteIndex() + + +** Select data, no predicates: ++---------------+-------+ +| file_name | value | ++---------------+-------+ +| file2.parquet | 100 | +| file2.parquet | 101 | +| file2.parquet | 102 | +| file2.parquet | 103 | +| file2.parquet | 104 | +| file2.parquet | 105 | +| file2.parquet | 106 | +| file2.parquet | 107 | +| file2.parquet | 108 | +| file2.parquet | 109 | ++---------------+-------+ +Files scanned: [("file2.parquet", ParquetAccessPlan { row_groups: [Scan, Scan] }), ("file1.parquet", ParquetAccessPlan { row_groups: [Scan, Scan] }), ("file3.parquet", ParquetAccessPlan { row_groups: [Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan] })] + +** Select data, predicate `value = 150` ++---------------+-------+ +| file_name | value | ++---------------+-------+ +| file2.parquet | 150 | ++---------------+-------+ +Files scanned: [("file2.parquet", ParquetAccessPlan { row_groups: [Skip, Scan] })] + +** Select data, predicate `value < 20 OR value > 500` ++---------------+--------------------------+ +| file_name | COUNT(index_table.value) | ++---------------+--------------------------+ +| file1.parquet | 20 | +| file3.parquet | 2499 | ++---------------+--------------------------+ +Files scanned: [("file1.parquet", ParquetAccessPlan { row_groups: [Scan, Skip] }), ("file3.parquet", ParquetAccessPlan { row_groups: [Skip, Skip, Skip, Skip, Skip, Skip, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan] })] +``` + +As you can see the index is being used to select which row groups to read from the Parquet files. diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db new file mode 100644 index 0000000000000000000000000000000000000000..4ebf78cf96088c7148cb47caf804cc036de75a34 GIT binary patch literal 4096 zcmWFz^vNtqRY=P(%1ta$FlG>7U}9o$P*7lCU|@t|AVoG{WY8;GzzfnYK(-m98b?E5 nGz3ONU^E0qLtr!nMnhmU1V%$(Gz3ONU^E0qLtr!nC=3ArfDQ*E literal 0 HcmV?d00001 diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs new file mode 100644 index 0000000..d4a7d84 --- /dev/null +++ b/sqlx-sqlite/src/index.rs @@ -0,0 +1,576 @@ +use std::{ + collections::HashMap, fmt::Display, fs::File, path::Path +}; + +use arrow::{ + array::AsArray, + datatypes::{Int16Type, Int32Type, Int64Type, UInt16Type, UInt32Type, UInt64Type}, +}; +use datafusion::{ + datasource::physical_plan::parquet::{ParquetAccessPlan, RequestedStatistics, RowGroupAccess, StatisticsConverter}, + parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder, + prelude::*, +}; +use datafusion_common::{internal_datafusion_err, DataFusionError, Result, ScalarValue}; +use datafusion_expr::Operator; +use sqlx::SqlitePool; +use sea_query::{Expr as SeaQExpr, Iden, OnConflict, Query, SimpleExpr, SqliteQueryBuilder, Value as SqlValue}; +use sea_query_binder::SqlxBinder; + +/// SQLite secondary index for a set of parquet files +/// +/// It stores file-level data (filename and file size) as well as statistics for each column +/// in each row group of each file. +/// +/// When we scan a table we push down filters to the index to get a list of row groups that match +/// and hence the files that need to be read. +/// +/// It is possible for the index to store finer grained statistics or a complete row oriented index +/// that filters down to individual rows within row groups. +/// For example, if you have a table with an `id` column and you want to enable fast point lookups +/// you could store the entire `id` column in the secondary index as a key/value map from `id` to +/// (file_name, row_group, row_number) and use that to enable fast point lookups on parquet files. +/// This is not implemented in this example. +#[derive(Debug)] +pub struct SQLiteIndex { + pool: SqlitePool, +} + +impl Display for SQLiteIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "SQLiteIndex()")?; + Ok(()) + } +} + +impl SQLiteIndex { + pub fn new(pool: SqlitePool) -> Self { + Self { pool } + } + + /// Return all the files matching the predicate + /// + /// Returns a tuple `(file_name, file_size)` + pub async fn get_files(&self, filter: Option) -> Result> { + let (sql, values) = Query::select() + .columns(vec![ + FileStatistics::FileName, + FileStatistics::FileSizeBytes, + FileStatistics::RowGroupCount, + ]) + .column(ColumnStatistics::RowGroup) + .distinct() // could be distinct_on(vec![ColumnStatistics::FileId, ColumnStatistics::RowGroup]) if the backing store supports it + .from(FileStatistics::Table) + .inner_join( + ColumnStatistics::Table, + SeaQExpr::col((FileStatistics::Table, FileStatistics::FileId)).equals((ColumnStatistics::Table, ColumnStatistics::FileId)), + ) + .and_where_option(filter.map(|f| push_down_filter(&f)).flatten()) + .build_sqlx(SqliteQueryBuilder); + + // TODO: we could aggregate the row groups into an array in the query to transmit less data over the wire + // (and maybe avoid the join), leaving that as a TODO since it introduces more complexity and coupling to the index's backing store + // Result is in the form of (file_name, file_size, row_group_count, row_group_to_scan) + let row_groups: Vec<(String, i64, i64, i64)> = sqlx::query_as_with(&sql, values) + .fetch_all(&self.pool) + .await.unwrap(); // TODO: handle error, possibly failing gracefully by scanning all files? + + let mut file_scans: HashMap = HashMap::new(); // file_name -> (file_size, row_groups) + + for (file_name, file_size, file_row_group_counts, row_group_to_scan) in row_groups { + let (_, access_plan) = file_scans.entry(file_name).or_insert((file_size, ParquetAccessPlan::new_none(file_row_group_counts as usize))); + // Here we could do finer grained row-level filtering, but this example does not implement that + access_plan.set(row_group_to_scan as usize, RowGroupAccess::Scan) + } + + Ok( + file_scans.into_iter().map(|(file_name, (file_size, access_plan))| { + ( + file_name, + FileScanPlan { + file_size: file_size as u64, + access_plan, + } + ) + }).collect() + ) + + } + + /// Add a new file to the index + pub async fn add_file(&mut self, file: &Path) -> anyhow::Result<()> { + let file_name = file + .file_name() + .ok_or_else(|| internal_datafusion_err!("No filename"))? + .to_str() + .ok_or_else(|| internal_datafusion_err!("Invalid filename"))?; + let file_size = file.metadata()?.len(); + + let file = File::open(file).map_err(|e| { + DataFusionError::from(e).context(format!("Error opening file {file:?}")) + })?; + + let reader = ParquetRecordBatchReaderBuilder::try_new(file)?; + + // extract the parquet statistics from the file's footer + let metadata = reader.metadata(); + + let mut column_statistics: Vec = Vec::with_capacity(reader.schema().fields().len() * metadata.num_row_groups()); + + for column in 0..reader.schema().fields().len() { + let column_name = reader.schema().field(column).name().clone(); + + let row_counts = StatisticsConverter::row_counts(reader.metadata())?; + let null_counts_array = StatisticsConverter::try_new(&column_name, RequestedStatistics::NullCount, reader.schema())?.extract(reader.metadata())?; + let null_counts = null_counts_array.as_primitive::(); + + let min_values = + StatisticsConverter::try_new(&column_name.clone(), RequestedStatistics::Min, reader.schema())? + .extract(reader.metadata())?; + let max_values = + StatisticsConverter::try_new(&column_name.clone(), RequestedStatistics::Max, reader.schema())? + .extract(reader.metadata())?; + + for row_group in 0..reader.metadata().num_row_groups() { + let stats = ColumnStatisticsInsertBuilder::new( + column_name.clone(), + row_group as i64, + null_counts.value(row_group) as i64, + row_counts.value(row_group) as i64, + ); + // match on the data type of the column, downcast the array and extract the min/max values and build the statistics + match reader.schema().field(column).data_type() { + arrow::datatypes::DataType::Int8 => { + let min_values = min_values.as_primitive::(); + let max_values = max_values.as_primitive::(); + let min = min_values.value(row_group) as i64; + let max = max_values.value(row_group) as i64; + let stats = stats.build(MinMaxStats::Int(min, max)); + column_statistics.push(stats); + } + arrow::datatypes::DataType::UInt8 => { + let min_values = min_values.as_primitive::(); + let max_values = max_values.as_primitive::(); + let min = min_values.value(row_group) as i64; + let max = max_values.value(row_group) as i64; + let stats = stats.build(MinMaxStats::Int(min, max)); + column_statistics.push(stats); + } + arrow::datatypes::DataType::Int16 => { + let min_values = min_values.as_primitive::(); + let max_values = max_values.as_primitive::(); + let min = min_values.value(row_group) as i64; + let max = max_values.value(row_group) as i64; + let stats = stats.build(MinMaxStats::Int(min, max)); + column_statistics.push(stats); + } + arrow::datatypes::DataType::UInt16 => { + let min_values = min_values.as_primitive::(); + let max_values = max_values.as_primitive::(); + let min = min_values.value(row_group) as i64; + let max = max_values.value(row_group) as i64; + let stats = stats.build(MinMaxStats::Int(min, max)); + column_statistics.push(stats); + } + arrow::datatypes::DataType::Int32 => { + let min_values = min_values.as_primitive::(); + let max_values = max_values.as_primitive::(); + let min = min_values.value(row_group) as i64; + let max = max_values.value(row_group) as i64; + let stats = stats.build(MinMaxStats::Int(min, max)); + column_statistics.push(stats); + } + arrow::datatypes::DataType::UInt32 => { + let min_values = min_values.as_primitive::(); + let max_values = max_values.as_primitive::(); + let min = min_values.value(row_group) as i64; + let max = max_values.value(row_group) as i64; + let stats = stats.build(MinMaxStats::Int(min, max)); + column_statistics.push(stats); + } + arrow::datatypes::DataType::Int64 => { + let min_values = min_values.as_primitive::(); + let max_values = max_values.as_primitive::(); + let min = min_values.value(row_group) as i64; + let max = max_values.value(row_group) as i64; + let stats = stats.build(MinMaxStats::Int(min, max)); + column_statistics.push(stats); + } + arrow::datatypes::DataType::Utf8 => { + let min_values = min_values.as_string::(); + let max_values = max_values.as_string::(); + let min = min_values.value(row_group).to_string(); + let max = max_values.value(row_group).to_string(); + let stats = stats.build(MinMaxStats::String(min, max)); + column_statistics.push(stats); + } + arrow::datatypes::DataType::LargeUtf8 => { + let min_values = min_values.as_string::(); + let max_values = max_values.as_string::(); + let min = min_values.value(row_group).to_string(); + let max = max_values.value(row_group).to_string(); + let stats = stats.build(MinMaxStats::String(min, max)); + column_statistics.push(stats); + } + _ => {} // ignore other types, we just don't put them in the index and filters will not be pushed down + } + } + } + + let file_statistics = FileStatisticsInsert { + file_name: file_name.to_string(), + file_size_bytes: file_size as i64, + row_group_count: metadata.num_row_groups() as i64, + row_count: metadata.file_metadata().num_rows() as i64, + }; + + self.add_row(file_statistics, column_statistics).await?; + Ok(()) + } + + async fn add_row( + &self, + file_statistics: FileStatisticsInsert, + column_statistics: Vec, + ) -> anyhow::Result<()> { + self.initialize().await?; + + let mut transaction = self.pool.begin().await?; + + let (sql, values) = Query::insert() + .into_table(FileStatistics::Table) + .columns(vec![ + FileStatistics::FileName, + FileStatistics::FileSizeBytes, + FileStatistics::RowGroupCount, + FileStatistics::RowCount, + ]) + .values_panic(vec![ + file_statistics.file_name.into(), + file_statistics.file_size_bytes.into(), + file_statistics.row_group_count.into(), + file_statistics.row_count.into(), + ]) + .on_conflict( + OnConflict::columns(vec![FileStatistics::FileName]).update_columns( + vec![ + FileStatistics::FileSizeBytes, + FileStatistics::RowGroupCount, + FileStatistics::RowCount, + ] + ).to_owned() + ) + .returning(Query::returning().column(FileStatistics::FileId)) + .build_sqlx(SqliteQueryBuilder); + let (file_id, ): (i64, ) = sqlx::query_as_with(&sql, values).fetch_one(&mut *transaction).await?; + + // Delete any existing column statistics for this file + let (sql, values) = Query::delete() + .from_table(ColumnStatistics::Table) + .and_where(SeaQExpr::col(ColumnStatistics::FileId).eq(file_id)) + .build_sqlx(SqliteQueryBuilder); + sqlx::query_with(&sql, values).execute(&mut *transaction).await?; + + for row_group_statistics in column_statistics { + let (sql, values) = Query::insert() + .into_table(ColumnStatistics::Table) + .columns(vec![ + ColumnStatistics::FileId, + ColumnStatistics::ColumnName, + ColumnStatistics::RowGroup, + ColumnStatistics::NullCount, + ColumnStatistics::RowCount, + ColumnStatistics::IntMinValue, + ColumnStatistics::IntMaxValue, + ColumnStatistics::StringMinValue, + ColumnStatistics::StringMaxValue, + ]) + .values_panic({ + match row_group_statistics.stats { + MinMaxStats::Int(min, max) => vec![ + file_id.into(), + row_group_statistics.column_name.into(), + row_group_statistics.row_group.into(), + row_group_statistics.null_count.into(), + row_group_statistics.row_count.into(), + min.into(), + max.into(), + SqlValue::String(None).into(), + SqlValue::String(None).into(), + ], + MinMaxStats::String(min, max) => vec![ + file_id.into(), + row_group_statistics.column_name.into(), + row_group_statistics.row_group.into(), + row_group_statistics.null_count.into(), + row_group_statistics.row_count.into(), + SqlValue::Int(None).into(), + SqlValue::Int(None).into(), + min.into(), + max.into(), + ], + }}) + .build_sqlx(SqliteQueryBuilder); + sqlx::query_with(&sql, values).execute(&mut *transaction).await?; + } + + transaction.commit().await?; + + Ok(()) + } + + /// Simple migration function that idempotently creates the table for the index + pub async fn initialize(&self) -> anyhow::Result<()> { + let query = sea_query::Table::create() + .table(FileStatistics::Table) + .if_not_exists() + .col(sea_query::ColumnDef::new(FileStatistics::FileId).big_integer().auto_increment().primary_key()) + .col(sea_query::ColumnDef::new(FileStatistics::FileName).string().not_null().unique_key()) + .col(sea_query::ColumnDef::new(FileStatistics::FileSizeBytes).big_integer().not_null()) + .col(sea_query::ColumnDef::new(FileStatistics::RowGroupCount).big_integer().not_null()) + .col(sea_query::ColumnDef::new(FileStatistics::RowCount).big_integer().not_null()) + .build(SqliteQueryBuilder); + + sqlx::query(&query).execute(&self.pool).await?; + + let query = sea_query::Table::create() + .table(ColumnStatistics::Table) + .if_not_exists() + .col(sea_query::ColumnDef::new(ColumnStatistics::FileId).big_integer().not_null()) + .col(sea_query::ColumnDef::new(ColumnStatistics::ColumnName).string().not_null()) + .col(sea_query::ColumnDef::new(ColumnStatistics::RowGroup).big_integer().not_null()) + .col(sea_query::ColumnDef::new(ColumnStatistics::NullCount).big_integer()) + .col(sea_query::ColumnDef::new(ColumnStatistics::RowCount).big_integer()) + .col(sea_query::ColumnDef::new(ColumnStatistics::IntMinValue).big_integer()) + .col(sea_query::ColumnDef::new(ColumnStatistics::IntMaxValue).big_integer()) + .col(sea_query::ColumnDef::new(ColumnStatistics::StringMinValue).string()) + .col(sea_query::ColumnDef::new(ColumnStatistics::StringMaxValue).string()) + .build(SqliteQueryBuilder); + + sqlx::query(&query).execute(&self.pool).await?; + + Ok(()) + } +} + + +#[derive(Debug, Clone)] +pub struct FileScanPlan { + pub file_size: u64, + pub access_plan: ParquetAccessPlan, +} + +#[derive(Debug, Clone, Iden)] +enum FileStatistics { + Table, + FileId, + FileName, + FileSizeBytes, + RowGroupCount, + RowCount, +} + +#[derive(Debug, Clone, Iden)] +enum ColumnStatistics { + Table, + FileId, + ColumnName, + RowGroup, + NullCount, + RowCount, + IntMinValue, + IntMaxValue, + StringMinValue, + StringMaxValue, + // Extend with other types as needed +} + +#[derive(Debug, Clone)] +pub enum MinMaxStats { + Int(i64, i64), + String(String, String), +} + +#[derive(Debug, Clone)] +pub struct ColumnStatisticsInsert { + pub column_name: String, + pub row_group: i64, + pub null_count: i64, + pub row_count: i64, + stats: MinMaxStats, +} + + +#[derive(Debug, Clone)] +pub struct ColumnStatisticsInsertBuilder { + column_name: String, + row_group: i64, + null_count: i64, + row_count: i64, +} + +impl ColumnStatisticsInsertBuilder { + pub fn new(column_name: String, row_group: i64, null_count: i64, row_count: i64) -> Self { + Self { + column_name, + row_group, + null_count, + row_count, + } + } + + pub fn build(self, stats: MinMaxStats) -> ColumnStatisticsInsert { + ColumnStatisticsInsert { + column_name: self.column_name, + row_group: self.row_group, + null_count: self.null_count, + row_count: self.row_count, + stats, + } + } +} + +#[derive(Debug, Clone)] +struct FileStatisticsInsert { + file_name: String, + file_size_bytes: i64, + row_group_count: i64, + row_count: i64, +} + + +pub fn push_down_filter(filter: &Expr) -> Option { + match filter { + Expr::BinaryExpr(binary_expr) => { + match (*binary_expr.left.clone(), *binary_expr.right.clone()) { + (Expr::Column(column), Expr::Literal(value)) => { + // This is something we can push down! + let column_name = column.name; + let filter = push_down_binary_filter(&value, &binary_expr.op); + if let Some(filter) = filter { + Some(SeaQExpr::col(ColumnStatistics::ColumnName).eq(column_name).and(filter)) + } else { + None + } + } + (left, right) => { + let left_pushdown = push_down_filter(&left); + let right_pushdown = push_down_filter(&right); + match (left_pushdown, right_pushdown) { + (Some(left_pushdown), Some(right_pushdown)) => { + match binary_expr.op { + Operator::And => { + Some(left_pushdown.and(right_pushdown)) + }, + Operator::Or => { + Some(left_pushdown.or(right_pushdown)) + }, + _ => { + None + } + } + } + _ => None + } + } + } + }, + Expr::Not(inner) => { + let inner_pushdown = push_down_filter(&*inner); + match inner_pushdown { + Some(inner_pushdown) => { + Some(inner_pushdown.not()) + }, + None => None + } + }, + // We could handle more cases here, at least simple ones involving nulls, negations, etc. + // But this example does not implement that + _ => None + } +} + +/// Push down a simple binary expression to the index +/// Only a subset of expressions are supported since `a = 1` has to be rewritten as `a_int_max_value >= 1 AND a_int_min_value <= 1` +fn push_down_binary_filter(value: &ScalarValue, op: &Operator) -> Option { + let (min_col, max_col, sql_value) = match value { + ScalarValue::Int8(v) => { + match v { + Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(Some(*v as i32))), + None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(None)), + } + }, + ScalarValue::UInt8(v) => { + match v { + Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(Some(*v as i32))), + None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(None)), + } + }, + ScalarValue::Int16(v) => { + match v { + Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(Some(*v as i32))), + None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(None)), + } + }, + ScalarValue::UInt16(v) => { + match v { + Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(Some(*v as i32))), + None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(None)), + } + }, + ScalarValue::Int32(v) => { + match v { + Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(Some(*v))), + None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(None)), + } + }, + ScalarValue::UInt32(v) => { + match v { + Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::BigInt(Some(*v as i64))), + None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::BigInt(None)), + } + }, + ScalarValue::Int64(v) => { + match v { + Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::BigInt(Some(*v))), + None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::BigInt(None)), + } + }, + ScalarValue::Utf8(v) => { + match v { + Some(v) => (ColumnStatistics::StringMinValue, ColumnStatistics::StringMaxValue, SqlValue::String(Some(Box::new(v.clone())))), + None => (ColumnStatistics::StringMinValue, ColumnStatistics::StringMaxValue, SqlValue::String(None)), + } + }, + ScalarValue::LargeUtf8(v) => { + match v { + Some(v) => (ColumnStatistics::StringMinValue, ColumnStatistics::StringMaxValue, SqlValue::String(Some(Box::new(v.clone())))), + None => (ColumnStatistics::StringMinValue, ColumnStatistics::StringMaxValue, SqlValue::String(None)), + } + }, + _ => return None, + }; + let min_col = SeaQExpr::col(min_col); + let max_col = SeaQExpr::col(max_col); + let expr = match op { + Operator::Eq => { + min_col.lte(sql_value.clone()).and(max_col.gte(sql_value)) + }, + Operator::Gt => { + max_col.gt(sql_value) + }, + Operator::Lt => { + min_col.lt(sql_value) + }, + Operator::GtEq => { + max_col.gte(sql_value) + }, + Operator::LtEq => { + min_col.lte(sql_value) + }, + // In theory we could handle LIKE for the limited but common case of a prefix match + // and maybe other operators, but this example does not implement that + _ => return None + }; + Some(expr) +} \ No newline at end of file diff --git a/sqlx-sqlite/src/main.rs b/sqlx-sqlite/src/main.rs new file mode 100644 index 0000000..daa5f97 --- /dev/null +++ b/sqlx-sqlite/src/main.rs @@ -0,0 +1,378 @@ +use std::{ + any::Any, cell::RefCell, fmt::Display, fs::{self, DirEntry, File}, ops::Range, path::{Path, PathBuf}, sync::{Arc, Mutex} +}; + +use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; +use arrow_schema::SchemaRef; +use async_trait::async_trait; +use datafusion::{ + datasource::{ + listing::PartitionedFile, + physical_plan::{parquet::ParquetAccessPlan, FileScanConfig, ParquetExec}, + TableProvider, + }, + execution::{context::SessionState, object_store::ObjectStoreUrl}, + parquet::{arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter}, file::properties::WriterProperties}, + physical_plan::ExecutionPlan, + prelude::*, +}; +use datafusion_common::{internal_datafusion_err, DFSchema, DataFusionError, Result}; +use datafusion_expr::{utils::conjunction, TableProviderFilterPushDown, TableType}; +use sqlx::SqlitePool; +use tempfile::TempDir; +use url::Url; + +use crate::index::SQLiteIndex; + +mod index; + +/// This example demonstrates building a secondary index over multiple Parquet +/// files and using that index during query to skip ("prune") files that do not +/// contain relevant data. +/// +/// This example rules out relevant data using min/max values of a column +/// extracted from the Parquet metadata. In a real system, the index could be +/// more sophisticated, e.g. using inverted indices, bloom filters or other +/// techniques. +/// +/// Note this is a low level example for people who want to build their own +/// custom indexes. To read a directory of parquet files as a table, you can use +/// a higher level API such as [`SessionContext::read_parquet`] or +/// [`ListingTable`], which also do file pruning based on parquet statistics +/// (using the same underlying APIs) +/// +/// For a more advanced example of using an index to prune row groups within a +/// file, see the (forthcoming) `advanced_parquet_index` example. +/// +/// # Diagram +/// +/// ```text +/// ┏━━━━━━━━━━━━━━━━━━━━━━━━┓ +/// ┃ Index ┃ +/// ┃ ┃ +/// step 1: predicate is ┌ ─ ─ ─ ─▶┃ (sometimes referred to ┃ +/// evaluated against ┃ as a "catalog" or ┃ +/// data in the index │ ┃ "metastore") ┃ +/// (using ┗━━━━━━━━━━━━━━━━━━━━━━━━┛ +/// PruningPredicate) │ │ +/// +/// │ │ +/// ┌──────────────┐ +/// │ value = 150 │─ ─ ─ ─ ┘ │ +/// └──────────────┘ ┌─────────────┐ +/// Predicate from query │ │ │ +/// └─────────────┘ +/// │ ┌─────────────┐ +/// step 2: Index returns only ─ ▶│ │ +/// parquet files that might have └─────────────┘ +/// matching data. ... +/// ┌─────────────┐ +/// Thus some parquet files are │ │ +/// "pruned" and thus are not └─────────────┘ +/// scanned at all Parquet Files +/// +/// ``` +/// +/// [`ListingTable`]: datafusion::datasource::listing::ListingTable +#[tokio::main] +async fn main() -> anyhow::Result<()> { + // We use an in-memory SQLite database to store the index for this example + // But the index could be stored in any database that SQLx supports, including a remote Postgres database + let pool = SqlitePool::connect("sqlite:index.db").await?; + + // Demo data has three files, each with schema + // * file_name (string) + // * value (int32) + // + // The files are as follows: + // * file1.parquet (value: 0..100) + // * file2.parquet (value: 100..200) + // * file3.parquet (value: 200..3000) + let data = DemoData::try_new()?; + + // Create a table provider with and our special index. + let index = SQLiteIndex::new(pool); + let provider = Arc::new(IndexTableProvider::try_new(data.path(), index).await?); + println!("** Table Provider:"); + println!("{provider}\n"); + + // Create a SessionContext for running queries that has the table provider + // registered as "index_table" + let ctx = SessionContext::new(); + ctx.register_table("index_table", Arc::clone(&provider) as _)?; + + // register object store provider for urls like `file://` work + let url = Url::try_from("file://").unwrap(); + let object_store = object_store::local::LocalFileSystem::new(); + ctx.register_object_store(&url, Arc::new(object_store)); + + // Select data from the table without any predicates (and thus no pruning) + println!("** Select data, no predicates:"); + ctx.sql("SELECT file_name, value FROM index_table LIMIT 10") + .await? + .show() + .await?; + println!("Files scanned: {:?}\n", provider.last_execution()); + + // Run a query that uses the index to prune files. + // + // Using the predicate "value = 150", the IndexTable can skip reading file 1 + // (max value 100) and file 3 (min value of 200) + println!("** Select data, predicate `value = 150`"); + ctx.sql("SELECT file_name, value FROM index_table WHERE value = 150") + .await? + .show() + .await?; + println!("Files scanned: {:?}\n", provider.last_execution()); + + // likewise, we can use a more complicated predicate like + // "value < 20 OR value > 500" to read only file 1 and file 3 + println!("** Select data, predicate `value < 20 OR value > 500`"); + ctx.sql( + "SELECT file_name, count(value) FROM index_table \ + WHERE value < 20 OR value > 500 GROUP BY file_name", + ) + .await? + .show() + .await?; + println!("Files scanned: {:?}\n", provider.last_execution()); + + Ok(()) +} + +/// DataFusion `TableProvider` that uses [`IndexTableProvider`], a secondary +/// index to decide which Parquet files and row groups to read. +#[derive(Debug)] +pub struct IndexTableProvider { + /// The index of the parquet files in the directory + index: SQLiteIndex, + /// the directory in which the files are stored + dir: PathBuf, + /// The schema of the table + schema: SchemaRef, + /// A simple log of the last execution + last_execution: Mutex> +} + +impl Display for IndexTableProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "IndexTableProvider")?; + writeln!(f, "---- Index ----")?; + write!(f, "{}", self.index) + } +} + +impl IndexTableProvider { + /// Create a new IndexTableProvider + pub async fn try_new( + dir: impl Into, + mut index: SQLiteIndex, + ) -> anyhow::Result { + let dir = dir.into(); + + let files = read_dir(&dir)?; + for file in &files { + index.add_file(&file.path()).await?; + } + + // Get the schema of the first file, assume they all have the same schema + let file = files.first().ok_or_else(|| { + internal_datafusion_err!("No files found in directory {dir:?}") + })?; + let file = File::open(file.path()).map_err(|e| { + DataFusionError::from(e).context(format!("Error opening file {file:?}")) + })?; + let reader = ParquetRecordBatchReaderBuilder::try_new(file)?; + let schema = reader.schema().clone(); + + Ok(Self { index, dir, schema, last_execution: Mutex::new(RefCell::new(SimpleExecutionLog::new())) }) + } +} + +#[async_trait] +impl TableProvider for IndexTableProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + async fn scan( + &self, + state: &SessionState, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> Result> { + let df_schema = DFSchema::try_from(self.schema())?; + + // convert filters like [`a = 1`, `b = 2`] to a single filter like `a = 1 AND b = 2` + let predicate = conjunction(filters.to_vec()); + + // Use the index to find the files that might have data that matches the + // predicate. Any file that can not have data that matches the predicate + // will not be returned. + let files = self.index.get_files(predicate.clone()).await?; + + // Record the last execution for debugging + self.last_execution.lock().unwrap().get_mut().record(files.iter().map(|(filename, plan)| (filename.clone(), plan.access_plan.clone())).collect()); + + let object_store_url = ObjectStoreUrl::parse("file://")?; + let mut file_scan_config = FileScanConfig::new(object_store_url, self.schema()) + .with_projection(projection.cloned()) + .with_limit(limit); + + // Transform to the format needed to pass to ParquetExec + // Create one file group per file (default to scanning them all in parallel) + for (file_name, file_scan_plan) in files { + let path = self.dir.join(file_name); + let canonical_path = fs::canonicalize(path)?; + file_scan_config = file_scan_config.with_file( + PartitionedFile::new( + canonical_path.display().to_string(), + file_scan_plan.file_size, + ).with_extensions(Arc::new(file_scan_plan.access_plan)) + ); + } + + let predicate = predicate + .map(|predicate| state.create_physical_expr(predicate, &df_schema)) + .transpose()? + // if there are no filters, use a literal true to have a predicate + // that always evaluates to true we can pass to the index + .unwrap_or_else(|| datafusion_physical_expr::expressions::lit(true)); + + let exec = ParquetExec::builder(file_scan_config) + .with_predicate(predicate) + .build_arc(); + + Ok(exec) + } + + /// Tell DataFusion to push filters down to the scan method + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> Result> { + // Inexact because the pruning can't handle all expressions and pruning + // is not done at the row level -- there may be rows in returned files + // that do not pass the filter + Ok(vec![TableProviderFilterPushDown::Inexact; filters.len()]) + } +} + +impl IndexTableProvider { + pub fn last_execution(&self) -> Vec<(String, ParquetAccessPlan)> { + self.last_execution.lock().unwrap().borrow().last_execution() + } +} + +/// Demonstration Data +/// +/// Makes a directory with three parquet files +/// +/// The schema of the files is +/// * file_name (string) +/// * value (int32) +/// +/// The files are as follows: +/// * file1.parquet (values 0..100) +/// * file2.parquet (values 100..200) +/// * file3.parquet (values 200..3000) +struct DemoData { + tmpdir: TempDir, +} + +impl DemoData { + fn try_new() -> Result { + let tmpdir = TempDir::new()?; + make_demo_file(tmpdir.path().join("file1.parquet"), 0..100)?; + make_demo_file(tmpdir.path().join("file2.parquet"), 100..200)?; + make_demo_file(tmpdir.path().join("file3.parquet"), 200..3000)?; + + Ok(Self { tmpdir }) + } + + fn path(&self) -> PathBuf { + self.tmpdir.path().into() + } +} + +/// Creates a new parquet file at the specified path. +/// +/// The `value` column increases sequentially from `min_value` to `max_value` +/// with the following schema: +/// +/// * file_name: Utf8 +/// * value: Int32 +fn make_demo_file(path: impl AsRef, value_range: Range) -> Result<()> { + let path = path.as_ref(); + let file = File::create(path)?; + let filename = path + .file_name() + .ok_or_else(|| internal_datafusion_err!("No filename"))? + .to_str() + .ok_or_else(|| internal_datafusion_err!("Invalid filename"))?; + + let num_values = value_range.len(); + let file_names = StringArray::from_iter_values(std::iter::repeat(&filename).take(num_values)); + let values = Int32Array::from_iter_values(value_range); + let batch = RecordBatch::try_from_iter(vec![ + ("file_name", Arc::new(file_names) as ArrayRef), + ("value", Arc::new(values) as ArrayRef), + ])?; + + let schema = batch.schema(); + + // write the actual values to the file + let props = WriterProperties::builder().set_max_row_group_size(50).build(); + let mut writer = ArrowWriter::try_new(file, schema, Some(props))?; + writer.write(&batch)?; + writer.finish()?; + + Ok(()) +} + +/// Return a list of the directory entries in the given directory, sorted by name +fn read_dir(dir: &Path) -> Result> { + let mut files = dir + .read_dir() + .map_err(|e| DataFusionError::from(e).context(format!("Error reading directory {dir:?}")))? + .map(|entry| { + entry.map_err(|e| { + DataFusionError::from(e) + .context(format!("Error reading directory entry in {dir:?}")) + }) + }) + .collect::>>()?; + files.sort_by_key(|entry| entry.file_name()); + Ok(files) +} + + +#[derive(Debug, Clone)] +pub struct SimpleExecutionLog { + last_execution: Vec<(String, ParquetAccessPlan)>, +} + +impl SimpleExecutionLog { + fn new() -> Self { + Self { + last_execution: vec![], + } + } + + fn record(&mut self, plan: Vec<(String, ParquetAccessPlan)>) { + self.last_execution = plan; + } + + fn last_execution(&self) -> Vec<(String, ParquetAccessPlan)> { + self.last_execution.clone() + } +} From 3ebe0aa8accc381c30563c5191f448ac4c07637e Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Fri, 7 Jun 2024 12:18:02 -0500 Subject: [PATCH 02/21] Example using SQLx + SQLite --- sqlx-sqlite/src/index.rs | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index d4a7d84..304fb1b 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -65,7 +65,7 @@ impl SQLiteIndex { ColumnStatistics::Table, SeaQExpr::col((FileStatistics::Table, FileStatistics::FileId)).equals((ColumnStatistics::Table, ColumnStatistics::FileId)), ) - .and_where_option(filter.map(|f| push_down_filter(&f)).flatten()) + .and_where_option(filter.and_then(|f| push_down_filter(&f))) .build_sqlx(SqliteQueryBuilder); // TODO: we could aggregate the row groups into an array in the query to transmit less data over the wire @@ -191,8 +191,8 @@ impl SQLiteIndex { arrow::datatypes::DataType::Int64 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); - let min = min_values.value(row_group) as i64; - let max = max_values.value(row_group) as i64; + let min = min_values.value(row_group); + let max = max_values.value(row_group); let stats = stats.build(MinMaxStats::Int(min, max)); column_statistics.push(stats); } @@ -221,7 +221,7 @@ impl SQLiteIndex { file_name: file_name.to_string(), file_size_bytes: file_size as i64, row_group_count: metadata.num_row_groups() as i64, - row_count: metadata.file_metadata().num_rows() as i64, + row_count: metadata.file_metadata().num_rows(), }; self.add_row(file_statistics, column_statistics).await?; @@ -447,11 +447,7 @@ pub fn push_down_filter(filter: &Expr) -> Option { // This is something we can push down! let column_name = column.name; let filter = push_down_binary_filter(&value, &binary_expr.op); - if let Some(filter) = filter { - Some(SeaQExpr::col(ColumnStatistics::ColumnName).eq(column_name).and(filter)) - } else { - None - } + filter.map(|filter| SeaQExpr::col(ColumnStatistics::ColumnName).eq(column_name).and(filter)) } (left, right) => { let left_pushdown = push_down_filter(&left); @@ -476,13 +472,8 @@ pub fn push_down_filter(filter: &Expr) -> Option { } }, Expr::Not(inner) => { - let inner_pushdown = push_down_filter(&*inner); - match inner_pushdown { - Some(inner_pushdown) => { - Some(inner_pushdown.not()) - }, - None => None - } + let inner_pushdown = push_down_filter(inner); + inner_pushdown.map(|inner_pushdown| inner_pushdown.not()) }, // We could handle more cases here, at least simple ones involving nulls, negations, etc. // But this example does not implement that From 57c46e86255bd4bf80ed46abf500612c771bc85c Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Fri, 7 Jun 2024 12:25:08 -0500 Subject: [PATCH 03/21] don't delete license --- LICENSE | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From 55ebd615a1d5d12e47eb8aceb775a898f9aa3a6d Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Fri, 7 Jun 2024 12:26:28 -0500 Subject: [PATCH 04/21] remove data --- sqlx-sqlite/index.db | Bin 4096 -> 28672 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index 4ebf78cf96088c7148cb47caf804cc036de75a34..d2876a7d3b0f4e48392a99f95cd1e9b0041b5572 100644 GIT binary patch literal 28672 zcmeI4d2Ae49mnU|Id^BBIDXFU@#e4{J3gLsCvD>tw+V5a*s)2I&~6-OZ8ovjj=j!B zFOkk03Xuv$K#@oaZIOT?afm`8rBa~?Xi=L&TO^=J9HLxlA{B}#MPg?CcI|!d@o4|4 z3VIu@oq6B+%*=jg=DYsQ{&x2EbY=4yZ)j{{bRh4wOBIr$NLxKmk|aBP8sSq9pRw?% z;b-`(@Q{oD;rzCx<}>37_fbi9ha`6>`j=?q%r7M2fN(%KARG`52nU1%!U5rca6mZl z&T-%aP&2AqTI9Pb^8<%RGJ|6yQ=_@`WPTu@oy=zkC#iwh^}U@PeVtxk$F{CcFGcAo zuii@yWk)jU?2(jrI6Iup<-P9ReO`BeS65RB$PJ8UQeHlDs!W_1JDDDy7@Hbrk-4dn zk@Vo$R4!js!^K4eITo%lJ(|s>PYjGqg)5ofhH&1(_`oS@d@?_g%?-~+qU{QSqAGsBq)Z%^-zT^+sqy`7!=y^j9A-8;I&h3x9=?h9XLVeyO| z;`Mj$=;`k)&M}!ilSv;wozF}bJ)ON@OS`UocaXD`?W>aLHC&_U)jf@}l+7K|Ch9{I6om7eFM};IMFC<@1{y3Tt58;4tKsX>A5Do|jgag6>;ec>J zI3OGl4hRR{UI(gWU8+-LIjY{K3V?sF+Zbns|Vkwc(UKAFn;oPcRJ}5nW z=LY*~wKB6HSe!pvQ1l^xq5i*;d@1?#+xzOo+Jpnb0pWmfKsX>A5Do|jgag6>;ec>J zI3OH&%MQ#@>k1bNO4kF_;z+6DS1QyxtL$Pz>GS_TzvcA_KsX>A5Do|jgag6>;ec>J zI3OGl4hRQ?1HyrKo&%M#+E}__Ga6(Ua%gp z#;h%tWxi-WX!II2`d{>K=}6zJd$G&0Z^wdIU#wPpMZ2hdQ0v!H>J{}d^&$0~I;kE| zJJfnLsl2AVq&%rStlX=dRt_mQD$Poj{D%Bn`6>CU@_q6h@{qhsUL!A(73q)CrP)>k zbgL~_%cdOO#z0aFUuanZ^$Gn!fZk6Ei=n<%KMv3tQdk6aSHBLR)1?A-_qyS)SUaS+KJSkK`?KiPXfF?;{9@M^~ z{TiSNQmBO5?b??CI!+36q1L6{2GBSu%z@fMZ6`ouq)-9XKWGa8%8^16s^3sw256KN zT&UiqJ_67PDI}oUtDXhu7%9Y|>Zv^d9VG<^DzB(D0A)$RhRS2gWq{sC3Kmp8q+A4O zm=sK?+@#zM&=4sYP+6($11LiZI+XvatN`c;Da4@sUHMgj21!AK@<-$+06I(xDwJ=L z?*V9l6ci{o$v0P_L!n^JK6at>y4+ZW(&4ySKK?`LAFb#XTdoO@bxEqLQw!hliN2Jy z9+q_ll})swn_0*@F7jR$vX+Y+WFc#~$N?78&PDdK5XePtVj*o@WFHG@+bMIRsIV=ij>TxW zG|`M|LI7{en*mzUPL`p|Ifk~Pce9XXTx16eNpX?wEToo;+{i*aE^-44so^4>EMzGc zc^3;=!bPrUA&a@lHWspoi*&G%gz-4i{-- zAvPCjWg!+9X<;EI7inf81{Yb)LUb+VaTx1msQMpJ13sE>o3#tzh{60VP z_dS=W3km$*`d^5!YSz)MxOR^2?HAqaFS^fPbZ-Df)c>OXpIvZ@`d`%lWmg$Q{XhGw zS5X~P>bq)1qW-5ZONsh__Ayd+z=-;P`r<|D8bR3w15y98{%{oa|9?E4T9K&#>HHP- zKj(lE^*?>Ui29#%!2Ivm|I;@Uh(7;cm^dMYcmI1P`AGPg|JmfxWKVKKvL>m!m)+;x zi|zyN-R`)%&%M@N;l>lMCVrlHBJugeJ&6+h4OkW#4HJ+uinBd$Fxrf3lvl9<}bbkTq)cS(~j?*duV&{F(W<`8o5P zdC**C0#i1g4LcB!kqb9XJcI+n0pWmfKsfM!;D9Lfd4)beOT%`DvfjP*&_T{O0a`)| z>!8!;_y8>?g|*O0IeP(GL<(!5ebuQ2Xdx-IL;G?23P1};0Ydwn{T+bjlR_J`57-|D zsG1a7pj0`Eg=T0yY5fhLd8Du!TK8Jt1E`V|nxJ*a`Y1qiNud#1 z%~l$qIi#=(nr~RE0jeN{253HI{u7`iDbz#rKJ!Naxuj4B%^~xX03}FaB{bKV8Gz!X zumYNj*$$9H3d^B!$&>-INnsf@E*Q@OWRXG&8Y9MefJ{=Tg~lf17(fOoc+fD7jR5JS zPy_WB4FjMUDJ+HhL;4EB2`JD*hQ+4La~cfCWT@bsYD96i`2h^6mS=*e>*AQ zE>izCQovoLKGhA2yGVVi8y0tw`cyY8?jrT6Zdlw!>QmjYxQo=Mx?%CBNd5_Ob8#1` zKSc_-i`1vOVR09!Pj$oME>fTBhQ(c^KGhA2yGZ?Ua^<*-)Tg>(aTlpib;IH=QlILE z#a*QSbT=#rsD!{t7f=a-lP;hV0w-NSB?L~ofJz9QbODtRIOzf^A#lVxv7bBZFSi0$w1GL143>!ZdyWg5(DG From 63f398fac1f282c97ca0db956bcd93bd1474dcfd Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 9 Jun 2024 13:19:11 -0500 Subject: [PATCH 05/21] doc updates --- sqlx-sqlite/index.db | Bin 28672 -> 28672 bytes sqlx-sqlite/src/index.rs | 102 +++++++++++++++++++++++++++++++++++---- sqlx-sqlite/src/main.rs | 57 +++++++++++++++------- 3 files changed, 133 insertions(+), 26 deletions(-) diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index d2876a7d3b0f4e48392a99f95cd1e9b0041b5572..e45f642b322398e0e8dd91c3dac1b7febec90950 100644 GIT binary patch literal 28672 zcmeHPdvIJ;8Q=SU?5jxlZS_8@H2a};ek2( z1n#v3kN?}l%2@N{J<0SxV`efJOC3smFa3RZpl-?m<$!WPIiMU+4k!nd1IhvAfO6n; zaNtwecFJ2?wChW!hYpMth9|~m4v*)jriZ3Sr=~}TrxqQs&f3-2JJ^>U?A^MfFI%?= zugf-M>kf^M74oA8>#_$%M@Gk|v;8{}Z-3j1L_y)Mcj&#~E;P;#huUa$@F) zNI5<;HkKcrm>Hj*MIl_E;E%y8<`0jK=dTzVn}HXJ&H=dL@uB03j!#Wbj*gEklmzDm zf>~f*6Q1ze^9{Sau~FL>S~hib3|h|oRN?4MVSKp2JvAA-IItmbp$U|0ue-){f{-WM z!kD3d^OL|m)tQ))+4E#}0$|LAk-}v5{9W75>)o|4yS;B;ws+6q&Tah=tSo+QMAK*^ilmp5E<$!WPIiMU+4k!nd1IhvAfO0@Npd9#cJFwKK z4;%lvH34Jx$lOwZzQm~ag2jRPH2@fsmVPY;|I|%6pd3&RCXa1*5am?-{3zyN&CNokq1`>u>14(7yrS3b-RZ zn(j;+si#wSrY=i$rOf0r$-9!5C%cnY;@6406Jv>f7bt!Kkjev zUGF*XUT?xX)AQWt-TT}l?nc*le(T)t9CbE1ar*`P%l4$b*-lu$v%X?YSzD~6`J(xl z`Jj2TdECsK=jgZSC-foxT)jm{+WXqe+Ed!MwcE8%Y9m^|)}gJ?jM%HOXSj($q{7$A zHCKbbV-PdKZ?r7O)^Y1=h%D!Ua%`Pr9Yds?2N1TFS$&8g9w@`+U#wC@%6MQIHos;5 z8Iff?P>RiK%x@x6$^#|X++kjgNC^)t#b%|s9g(Fxuml@_Gb<2T!UJh+eAjpdku(pa zuyMWd5F#laNMd7`aT1Xv4(v~qwht;;Q6K3 zAh?SNtr4NUr>uAmgoX$~a_C`l;MnJ5Pb@3;!Eq33^T!XyZfz-^?Q5Cv1?%g>&)ejK zZ~b8A<2B5amf~5U2+!aHDyoWGeJ>hRMaA5hq9OJ4R1|vw4da!U&!ORPdP<60d@mS` z28Kjq!NW_7o8ce`0u!Qxx#r>~-34dSm0*ds<@|(cn0o?ED)Z$9@)j zx0S?TDncLX>y5clbcM4ww?-NqpL^0;yjCRXluE7@RIWUW+k zR3vGTN{)ym^-{@%NU}yM85c=bOC^Uzl2ua4m`JixD!E)Fsgp`B6G>{Nl2MT)E0ug) zB*{o6BO*zSRB}ipsg_C#BFPG=m;aND`Myc8eswRI*DX@uZRgk;Iis&KF4>sbr@}VoN3cB8erHoF|f)QppaH z#E?q1izK>4(o*~=NND>pU%^Th+dx4J_y3`QzS$ua!T107S{tJ8|33u1{>k)Z>49{2 zI+M0jucw|(ol4!Cx-oSmwKugXwK|nZzMXtA`DpU1$zt+~c5Jd(7SKwzzTU73Xp1Zs#gzuhZtF?bqxl?ECC%?ThU$yVQEqddhmhI%(yt4c1-O z3F`t2TPgEZ^QY#$&|+U?cAA#)tnp*x>qgNyYHT)=`aAj`^+)tOxD^8R^fYlm_4{;% z<3rc)*A~x&g_f|luC6wMb&0NUDF$~%3m3Snr>C~Kf#Y;iPP80@yV`+s2FK}$IGr?3 zXAq}{E<|X5vPO3=?LOh&Z(b8YYnm zJ(S!7WC9%$!=Wbh&=>827bVaVF&t__H;vOB#DN}&;ZPI0X`JpL4m3Rshnlc(QV4W8 z42PP~MdJjMLZGc-IMjrNlR}`6VK}XeCxt-c!f;v^PYQufh2bPE$|t-4TiIha%h~kAP%$_3}Hv<7jY z31B!YLr#w7Gn@`^pz3Ehb(9mu$%Xk0Wj@2HjW{%)b74M1UC(f`ltbf$`3!|T!^uP( zn$Nl5B152ZXE-&KL*s<`45d25sg5`_pL4-QhCpr3a8^(bjT2mC2o&QCrz+ymi%c%K z$PlQ$8BQhT&^W8BSTmq4}H(^BHPnhO>-vXq+&gp(tiJr4eU#fj-PsvPGX!=G7=|A)q8O0F8M2jQ zcp-zPFl2Cs^l}V0Vni_@e=}qY$8bUhO<%~~4B5;v?1&M?fZWZHO&r4t88mevb2DTk z$1o#C6a(@$L*|MHIxj}ZplJ(Pn;~hzTSJ1vA#9B(BP7gTfnh6%1XV-W z2{D8y0%R*9P%^Olgk3oP>G;ElK%Kw>oj5)izX1^_4p^WA$Lr#|5rK+;1=_LyR=gIG zb{=TM{tx{(5ozNAjQyMZM-ah0ki-5({$~-%@jxr~8~lBUwDLd;_TKU95ozIpX6*gM z`v)S;Jg^>npYt9^WIYcwVeeA!W<;8Jpb>k`ULKJ~9$1Ine|zf@S;qrwvHNrPUx=*b zfd=e;!F>{u1|F!#?jiR!MCy594R+hz0wQa8U^RAiw+)fiJg^EoPrDi-t9W1~cJ6Y1 zg~&=CsKd^fb0;EoJWz|BGn~s2spWwzc3cPYD$4^IY(MWfh-7%62HW@B&mmI71JxLI z#_vO>3dy9P(krg~ph3%BR8IdX;sKnOq?Ia?VJWzqH2d$Ta{(nDgqMK#L%f{2E z`~E-l|7!n#!A7Xs{|`SI5MA`4v{3v1^>Z8H;WDDy|5y9}3qJmELbOD!_Wu{=2={>z zwg12PBQ|RPUy?AeZnoGlwf`@1Ozrtw+V5a*s)2I&~6-OZ8ovjj=j!B zFOkk03Xuv$K#@oaZIOT?afm`8rBa~?Xi=L&TO^=J9HLxlA{B}#MPg?CcI|!d@o4|4 z3VIu@oq6B+%*=jg=DYsQ{&x2EbY=4yZ)j{{bRh4wOBIr$NLxKmk|aBP8sSq9pRw?% z;b-`(@Q{oD;rzCx<}>37_fbi9ha`6>`j=?q%r7M2fN(%KARG`52nU1%!U5rca6mZl z&T-%aP&2AqTI9Pb^8<%RGJ|6yQ=_@`WPTu@oy=zkC#iwh^}U@PeVtxk$F{CcFGcAo zuii@yWk)jU?2(jrI6Iup<-P9ReO`BeS65RB$PJ8UQeHlDs!W_1JDDDy7@Hbrk-4dn zk@Vo$R4!js!^K4eITo%lJ(|s>PYjGqg)5ofhH&1(_`oS@d@?_g%?-~+qU{QSqAGsBq)Z%^-zT^+sqy`7!=y^j9A-8;I&h3x9=?h9XLVeyO| z;`Mj$=;`k)&M}!ilSv;wozF}bJ)ON@OS`UocaXD`?W>aLHC&_U)jf@}l+7K|Ch9{I6om7eFM};IMFC<@1{y3Tt58;4tKsX>A5Do|jgag6>;ec>J zI3OGl4hRR{UI(gWU8+-LIjY{K3V?sF+Zbns|Vkwc(UKAFn;oPcRJ}5nW z=LY*~wKB6HSe!pvQ1l^xq5i*;d@1?#+xzOo+Jpnb0pWmfKsX>A5Do|jgag6>;ec>J zI3OH&%MQ#@>k1bNO4kF_;z+6DS1QyxtL$Pz>GS_TzvcA_KsX>A5Do|jgag6>;ec>J zI3OGl4hRQ?1HyrKo&%M#+E}__Ga6(Ua%gp z#;h%tWxi-WX!II2`d{>K=}6zJd$G&0Z^wdIU#wPpMZ2hdQ0v!H>J{}d^&$0~I;kE| zJJfnLsl2AVq&%rStlX=dRt_mQD$Poj{D%Bn`6>CU@_q6h@{qhsUL!A(73q)CrP)>k zbgL~_%cdOO#z0aFUuanZ^$Gn!fZk6Ei=n<%KMv3tQdk6aSHBLR)1?A-_qyS)SUaS+KJSkK`?KiPXfF?;{9@M^~ z{TiSNQmBO5?b??CI!+36q1L6{2GBSu%z@fMZ6`ouq)-9XKWGa8%8^16s^3sw256KN zT&UiqJ_67PDI}oUtDXhu7%9Y|>Zv^d9VG<^DzB(D0A)$RhRS2gWq{sC3Kmp8q+A4O zm=sK?+@#zM&=4sYP+6($11LiZI+XvatN`c;Da4@sUHMgj21!AK@<-$+06I(xDwJ=L z?*V9l6ci{o$v0P_L!n^JK6at>y4+ZW(&4ySKK?`LAFb#XTdoO@bxEqLQw!hliN2Jy z9+q_ll})swn_0*@F7jR$vX+Y+WFc#~$N?78&PDdK5XePtVj*o@WFHG@+bMIRsIV=ij>TxW zG|`M|LI7{en*mzUPL`p|Ifk~Pce9XXTx16eNpX?wEToo;+{i*aE^-44so^4>EMzGc zc^3;=!bPrUA&a@lHWspoi*&G%gz-4i{-- zAvPCjWg!+9X<;EI7inf81{Yb)LUb+VaTx1msQMpJ13sE>o3#tzh{60VP z_dS=W3km$*`d^5!YSz)MxOR^2?HAqaFS^fPbZ-Df)c>OXpIvZ@`d`%lWmg$Q{XhGw zS5X~P>bq)1qW-5ZONsh__Ayd+z=-;P`r<|D8bR3w15y98{%{oa|9?E4T9K&#>HHP- zKj(lE^*?>Ui29#%!2Ivm|I;@Uh(7;cm^dMYcmI1P`AGPg|JmfxWKVKKvL>m!m)+;x zi|zyN-R`)%&%M@N;l>lMCVrlHBJugeJ&6+h4OkW#4HJ+uinBd$Fxrf3lvl9<}bbkTq)cS(~j?*duV&{F(W<`8o5P zdC**C0#i1g4LcB!kqb9XJcI+n0pWmfKsfM!;D9Lfd4)beOT%`DvfjP*&_T{O0a`)| z>!8!;_y8>?g|*O0IeP(GL<(!5ebuQ2Xdx-IL;G?23P1};0Ydwn{T+bjlR_J`57-|D zsG1a7pj0`Eg=T0yY5fhLd8Du!TK8Jt1E`V|nxJ*a`Y1qiNud#1 z%~l$qIi#=(nr~RE0jeN{253HI{u7`iDbz#rKJ!Naxuj4B%^~xX03}FaB{bKV8Gz!X zumYNj*$$9H3d^B!$&>-INnsf@E*Q@OWRXG&8Y9MefJ{=Tg~lf17(fOoc+fD7jR5JS zPy_WB4FjMUDJ+HhL;4EB2`JD*hQ+4La~cfCWT@bsYD96i`2h^6mS=*e>*AQ zE>izCQovoLKGhA2yGVVi8y0tw`cyY8?jrT6Zdlw!>QmjYxQo=Mx?%CBNd5_Ob8#1` zKSc_-i`1vOVR09!Pj$oME>fTBhQ(c^KGhA2yGZ?Ua^<*-)Tg>(aTlpib;IH=QlILE z#a*QSbT=#rsD!{t7f=a-lP;hV0w-NSB?L~ofJz9QbODtRIOzf^A#l) -> Result> { let (sql, values) = Query::select() .columns(vec![ @@ -74,6 +103,7 @@ impl SQLiteIndex { let row_groups: Vec<(String, i64, i64, i64)> = sqlx::query_as_with(&sql, values) .fetch_all(&self.pool) .await.unwrap(); // TODO: handle error, possibly failing gracefully by scanning all files? + let mut file_scans: HashMap = HashMap::new(); // file_name -> (file_size, row_groups) @@ -82,7 +112,7 @@ impl SQLiteIndex { // Here we could do finer grained row-level filtering, but this example does not implement that access_plan.set(row_group_to_scan as usize, RowGroupAccess::Scan) } - + Ok( file_scans.into_iter().map(|(file_name, (file_size, access_plan))| { ( @@ -452,9 +482,9 @@ pub fn push_down_filter(filter: &Expr) -> Option { (left, right) => { let left_pushdown = push_down_filter(&left); let right_pushdown = push_down_filter(&right); - match (left_pushdown, right_pushdown) { - (Some(left_pushdown), Some(right_pushdown)) => { - match binary_expr.op { + match (left_pushdown, right_pushdown, binary_expr.op) { + (Some(left_pushdown), Some(right_pushdown), op) => { + match op { Operator::And => { Some(left_pushdown.and(right_pushdown)) }, @@ -466,6 +496,11 @@ pub fn push_down_filter(filter: &Expr) -> Option { } } } + // If we have A AND B but we can't push down B we can still push down A + // because A must be true for the whole expression to be true + (Some(left_pushdown), None, Operator::And) => Some(left_pushdown), + // Same for the other side + (None, Some(right_pushdown), Operator::And) => Some(right_pushdown), _ => None } } @@ -475,6 +510,44 @@ pub fn push_down_filter(filter: &Expr) -> Option { let inner_pushdown = push_down_filter(inner); inner_pushdown.map(|inner_pushdown| inner_pushdown.not()) }, + Expr::Like(inner) => { + let negated = inner.negated; + let expr = *inner.expr.clone(); + let pattern = *inner.pattern.clone(); + let escape_char = inner.escape_char; + let case_insensitive = inner.case_insensitive; + + let column = match &expr { + Expr::Column(column) => column.clone(), + _ => return None + }; + let pattern = match &pattern { + Expr::Literal(ScalarValue::Utf8(Some(pattern))) => pattern.clone(), + _ => return None + }; + // We don't support escape characters in this example + if escape_char.is_some() { + return None; + } + // Find the prefix in pattern by looking for the first `%` and truncate + let prefix_len = pattern.chars().position(|c| c == '%').unwrap_or_else(|| pattern.len()); + let mut prefix = pattern.chars().take(prefix_len).collect::(); + let mut min_val_col = SeaQExpr::col(ColumnStatistics::StringMinValue); + // If this is a case insensitive match we need to convert the prefix to lowercase + if case_insensitive { + prefix = prefix.to_lowercase(); + min_val_col = SeaQExpr::expr(sea_query::Func::lower(min_val_col)); + }; + + let filter = SeaQExpr::col(ColumnStatistics::ColumnName).eq(column.name).and( + min_val_col.gte(SqlValue::String(Some(Box::new(prefix.clone())))) + ); + if negated { + Some(filter.not()) + } else { + Some(filter) + } + }, // We could handle more cases here, at least simple ones involving nulls, negations, etc. // But this example does not implement that _ => None @@ -559,8 +632,19 @@ fn push_down_binary_filter(value: &ScalarValue, op: &Operator) -> Option { min_col.lte(sql_value) }, - // In theory we could handle LIKE for the limited but common case of a prefix match - // and maybe other operators, but this example does not implement that + Operator::LikeMatch => { + // Find a prefix in the LIKE pattern and use it to filter + match sql_value { + SqlValue::String(Some(pattern)) => { + let mut prefix = pattern.clone(); + let prefix_len = prefix.chars().position(|c| c == '%').unwrap_or_else(|| prefix.len()); + prefix.truncate(prefix_len); + min_col.lte(SqlValue::String(Some(prefix.clone()))).and(max_col.gte(SqlValue::String(Some(prefix)))) + }, + _ => return None + } + }, + // In theory we could handle other operators, but this example does not implement that _ => return None }; Some(expr) diff --git a/sqlx-sqlite/src/main.rs b/sqlx-sqlite/src/main.rs index daa5f97..b7443f9 100644 --- a/sqlx-sqlite/src/main.rs +++ b/sqlx-sqlite/src/main.rs @@ -27,10 +27,10 @@ use crate::index::SQLiteIndex; mod index; /// This example demonstrates building a secondary index over multiple Parquet -/// files and using that index during query to skip ("prune") files that do not -/// contain relevant data. +/// files and using that index during query to skip ("prune") files and row groups +/// that do not contain relevant data. /// -/// This example rules out relevant data using min/max values of a column +/// This example rules out irrelevant data using min/max values of a column /// extracted from the Parquet metadata. In a real system, the index could be /// more sophisticated, e.g. using inverted indices, bloom filters or other /// techniques. @@ -41,9 +41,6 @@ mod index; /// [`ListingTable`], which also do file pruning based on parquet statistics /// (using the same underlying APIs) /// -/// For a more advanced example of using an index to prune row groups within a -/// file, see the (forthcoming) `advanced_parquet_index` example. -/// /// # Diagram /// /// ```text @@ -53,27 +50,39 @@ mod index; /// step 1: predicate is ┌ ─ ─ ─ ─▶┃ (sometimes referred to ┃ /// evaluated against ┃ as a "catalog" or ┃ /// data in the index │ ┃ "metastore") ┃ -/// (using ┗━━━━━━━━━━━━━━━━━━━━━━━━┛ -/// PruningPredicate) │ │ +/// ┗━━━━━━━━━━━━━━━━━━━━━━━━┛ +/// │ │ /// /// │ │ /// ┌──────────────┐ /// │ value = 150 │─ ─ ─ ─ ┘ │ /// └──────────────┘ ┌─────────────┐ /// Predicate from query │ │ │ +/// │ skip file │ +/// │ │ │ /// └─────────────┘ /// │ ┌─────────────┐ -/// step 2: Index returns only ─ ▶│ │ -/// parquet files that might have └─────────────┘ -/// matching data. ... +/// step 2: Index returns only │ ┌────────┐ │ +/// parquet files that might │ │ │ scan │ │ +/// have matching data. │ │ rg 0 │ │ +/// │ │ └────────┘ │ +/// │ ┌────────┐ │ +/// │ │ │ scan │ │ +/// ─ ▶ │ │ rg 3 │ │ +/// The index can choose to │ └────────┘ │ +/// scan entire files, │ ... │ +/// only some row-groups within │ ┌────────┐ │ +/// each file, or even │ │ scan │ │ +/// individual rows within each │ │ rg n │ │ +/// row group (not shown in this) │ └────────┘ │ +/// example └─────────────┘ +/// ... /// ┌─────────────┐ -/// Thus some parquet files are │ │ -/// "pruned" and thus are not └─────────────┘ -/// scanned at all Parquet Files -/// +/// │ │ +/// └─────────────┘ +/// Parquet Files /// ``` /// -/// [`ListingTable`]: datafusion::datasource::listing::ListingTable #[tokio::main] async fn main() -> anyhow::Result<()> { // We use an in-memory SQLite database to store the index for this example @@ -137,6 +146,14 @@ async fn main() -> anyhow::Result<()> { .await?; println!("Files scanned: {:?}\n", provider.last_execution()); + // it's even possible to get LIKE pushed down to the index + println!("** Select data, predicate `text LIKE 'text2%0'`"); + ctx.sql("SELECT file_name, count(text) FROM index_table WHERE text LIKE 'text2%0' GROUP BY file_name") + .await? + .show() + .await?; + println!("Files scanned: {:?}\n", provider.last_execution()); + Ok(()) } @@ -311,6 +328,7 @@ impl DemoData { /// /// * file_name: Utf8 /// * value: Int32 +/// * text: Utf8 fn make_demo_file(path: impl AsRef, value_range: Range) -> Result<()> { let path = path.as_ref(); let file = File::create(path)?; @@ -322,10 +340,15 @@ fn make_demo_file(path: impl AsRef, value_range: Range) -> Result<()> let num_values = value_range.len(); let file_names = StringArray::from_iter_values(std::iter::repeat(&filename).take(num_values)); - let values = Int32Array::from_iter_values(value_range); + let values = Int32Array::from_iter_values(value_range.clone()); + let texts: StringArray = value_range + .map(|i| format!("text{}", i)) + .collect::>() + .into(); let batch = RecordBatch::try_from_iter(vec![ ("file_name", Arc::new(file_names) as ArrayRef), ("value", Arc::new(values) as ArrayRef), + ("text", Arc::new(texts) as ArrayRef), ])?; let schema = batch.schema(); From b22b77e0e86aba9a1aeff46669f03bdadb70f705 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sun, 9 Jun 2024 13:19:44 -0500 Subject: [PATCH 06/21] doc updates --- sqlx-sqlite/index.db | Bin 28672 -> 4096 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index e45f642b322398e0e8dd91c3dac1b7febec90950..4ebf78cf96088c7148cb47caf804cc036de75a34 100644 GIT binary patch delta 33 ncmZp8z}TQLL7J73fq{W>Vxv7bBZFSi0$w1GL143>z%+gUg0u$b literal 28672 zcmeHPdvIJ;8Q=SU?5jxlZS_8@H2a};ek2( z1n#v3kN?}l%2@N{J<0SxV`efJOC3smFa3RZpl-?m<$!WPIiMU+4k!nd1IhvAfO6n; zaNtwecFJ2?wChW!hYpMth9|~m4v*)jriZ3Sr=~}TrxqQs&f3-2JJ^>U?A^MfFI%?= zugf-M>kf^M74oA8>#_$%M@Gk|v;8{}Z-3j1L_y)Mcj&#~E;P;#huUa$@F) zNI5<;HkKcrm>Hj*MIl_E;E%y8<`0jK=dTzVn}HXJ&H=dL@uB03j!#Wbj*gEklmzDm zf>~f*6Q1ze^9{Sau~FL>S~hib3|h|oRN?4MVSKp2JvAA-IItmbp$U|0ue-){f{-WM z!kD3d^OL|m)tQ))+4E#}0$|LAk-}v5{9W75>)o|4yS;B;ws+6q&Tah=tSo+QMAK*^ilmp5E<$!WPIiMU+4k!nd1IhvAfO0@Npd9#cJFwKK z4;%lvH34Jx$lOwZzQm~ag2jRPH2@fsmVPY;|I|%6pd3&RCXa1*5am?-{3zyN&CNokq1`>u>14(7yrS3b-RZ zn(j;+si#wSrY=i$rOf0r$-9!5C%cnY;@6406Jv>f7bt!Kkjev zUGF*XUT?xX)AQWt-TT}l?nc*le(T)t9CbE1ar*`P%l4$b*-lu$v%X?YSzD~6`J(xl z`Jj2TdECsK=jgZSC-foxT)jm{+WXqe+Ed!MwcE8%Y9m^|)}gJ?jM%HOXSj($q{7$A zHCKbbV-PdKZ?r7O)^Y1=h%D!Ua%`Pr9Yds?2N1TFS$&8g9w@`+U#wC@%6MQIHos;5 z8Iff?P>RiK%x@x6$^#|X++kjgNC^)t#b%|s9g(Fxuml@_Gb<2T!UJh+eAjpdku(pa zuyMWd5F#laNMd7`aT1Xv4(v~qwht;;Q6K3 zAh?SNtr4NUr>uAmgoX$~a_C`l;MnJ5Pb@3;!Eq33^T!XyZfz-^?Q5Cv1?%g>&)ejK zZ~b8A<2B5amf~5U2+!aHDyoWGeJ>hRMaA5hq9OJ4R1|vw4da!U&!ORPdP<60d@mS` z28Kjq!NW_7o8ce`0u!Qxx#r>~-34dSm0*ds<@|(cn0o?ED)Z$9@)j zx0S?TDncLX>y5clbcM4ww?-NqpL^0;yjCRXluE7@RIWUW+k zR3vGTN{)ym^-{@%NU}yM85c=bOC^Uzl2ua4m`JixD!E)Fsgp`B6G>{Nl2MT)E0ug) zB*{o6BO*zSRB}ipsg_C#BFPG=m;aND`Myc8eswRI*DX@uZRgk;Iis&KF4>sbr@}VoN3cB8erHoF|f)QppaH z#E?q1izK>4(o*~=NND>pU%^Th+dx4J_y3`QzS$ua!T107S{tJ8|33u1{>k)Z>49{2 zI+M0jucw|(ol4!Cx-oSmwKugXwK|nZzMXtA`DpU1$zt+~c5Jd(7SKwzzTU73Xp1Zs#gzuhZtF?bqxl?ECC%?ThU$yVQEqddhmhI%(yt4c1-O z3F`t2TPgEZ^QY#$&|+U?cAA#)tnp*x>qgNyYHT)=`aAj`^+)tOxD^8R^fYlm_4{;% z<3rc)*A~x&g_f|luC6wMb&0NUDF$~%3m3Snr>C~Kf#Y;iPP80@yV`+s2FK}$IGr?3 zXAq}{E<|X5vPO3=?LOh&Z(b8YYnm zJ(S!7WC9%$!=Wbh&=>827bVaVF&t__H;vOB#DN}&;ZPI0X`JpL4m3Rshnlc(QV4W8 z42PP~MdJjMLZGc-IMjrNlR}`6VK}XeCxt-c!f;v^PYQufh2bPE$|t-4TiIha%h~kAP%$_3}Hv<7jY z31B!YLr#w7Gn@`^pz3Ehb(9mu$%Xk0Wj@2HjW{%)b74M1UC(f`ltbf$`3!|T!^uP( zn$Nl5B152ZXE-&KL*s<`45d25sg5`_pL4-QhCpr3a8^(bjT2mC2o&QCrz+ymi%c%K z$PlQ$8BQhT&^W8BSTmq4}H(^BHPnhO>-vXq+&gp(tiJr4eU#fj-PsvPGX!=G7=|A)q8O0F8M2jQ zcp-zPFl2Cs^l}V0Vni_@e=}qY$8bUhO<%~~4B5;v?1&M?fZWZHO&r4t88mevb2DTk z$1o#C6a(@$L*|MHIxj}ZplJ(Pn;~hzTSJ1vA#9B(BP7gTfnh6%1XV-W z2{D8y0%R*9P%^Olgk3oP>G;ElK%Kw>oj5)izX1^_4p^WA$Lr#|5rK+;1=_LyR=gIG zb{=TM{tx{(5ozNAjQyMZM-ah0ki-5({$~-%@jxr~8~lBUwDLd;_TKU95ozIpX6*gM z`v)S;Jg^>npYt9^WIYcwVeeA!W<;8Jpb>k`ULKJ~9$1Ine|zf@S;qrwvHNrPUx=*b zfd=e;!F>{u1|F!#?jiR!MCy594R+hz0wQa8U^RAiw+)fiJg^EoPrDi-t9W1~cJ6Y1 zg~&=CsKd^fb0;EoJWz|BGn~s2spWwzc3cPYD$4^IY(MWfh-7%62HW@B&mmI71JxLI z#_vO>3dy9P(krg~ph3%BR8IdX;sKnOq?Ia?VJWzqH2d$Ta{(nDgqMK#L%f{2E z`~E-l|7!n#!A7Xs{|`SI5MA`4v{3v1^>Z8H;WDDy|5y9}3qJmELbOD!_Wu{=2={>z zwg12PBQ|RPUy?AeZnoGlwf`@1Ozr Date: Mon, 10 Jun 2024 11:44:27 -0500 Subject: [PATCH 07/21] Use datafusion @ main, clean up deps, fix flushing of WAL to DB --- sqlx-sqlite/Cargo.lock | 994 +++++++++++++-------------------------- sqlx-sqlite/Cargo.toml | 33 +- sqlx-sqlite/index.db | Bin 4096 -> 28672 bytes sqlx-sqlite/src/index.rs | 52 +- sqlx-sqlite/src/main.rs | 10 +- 5 files changed, 367 insertions(+), 722 deletions(-) diff --git a/sqlx-sqlite/Cargo.lock b/sqlx-sqlite/Cargo.lock index f5d87ee..ec7cee3 100644 --- a/sqlx-sqlite/Cargo.lock +++ b/sqlx-sqlite/Cargo.lock @@ -96,9 +96,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" +checksum = "7ae9728f104939be6d8d9b368a354b4929b0569160ea1641f0721b55a861ce38" dependencies = [ "arrow-arith", "arrow-array", @@ -117,9 +117,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" +checksum = "a7029a5b3efbeafbf4a12d12dc16b8f9e9bff20a410b8c25c5d28acc089e1043" dependencies = [ "arrow-array", "arrow-buffer", @@ -132,9 +132,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" +checksum = "d33238427c60271710695f17742f45b1a5dc5bcfc5c15331c25ddfe7abf70d97" dependencies = [ "ahash", "arrow-buffer", @@ -149,9 +149,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" +checksum = "fe9b95e825ae838efaf77e366c00d3fc8cca78134c9db497d6bda425f2e7b7c1" dependencies = [ "bytes", "half", @@ -160,9 +160,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" +checksum = "87cf8385a9d5b5fcde771661dd07652b79b9139fea66193eda6a88664400ccab" dependencies = [ "arrow-array", "arrow-buffer", @@ -181,9 +181,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95cbcba196b862270bf2a5edb75927380a7f3a163622c61d40cbba416a6305f2" +checksum = "cea5068bef430a86690059665e40034625ec323ffa4dd21972048eebb0127adc" dependencies = [ "arrow-array", "arrow-buffer", @@ -200,9 +200,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" +checksum = "cb29be98f987bcf217b070512bb7afba2f65180858bca462edf4a39d84a23e10" dependencies = [ "arrow-buffer", "arrow-schema", @@ -212,9 +212,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a42ea853130f7e78b9b9d178cb4cd01dee0f78e64d96c2949dc0a915d6d9e19d" +checksum = "ffc68f6523970aa6f7ce1dc9a33a7d9284cfb9af77d4ad3e617dbe5d79cc6ec8" dependencies = [ "arrow-array", "arrow-buffer", @@ -227,9 +227,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaafb5714d4e59feae964714d724f880511500e3569cc2a94d02456b403a2a49" +checksum = "2041380f94bd6437ab648e6c2085a045e45a0c44f91a1b9a4fe3fed3d379bfb1" dependencies = [ "arrow-array", "arrow-buffer", @@ -247,9 +247,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" +checksum = "fcb56ed1547004e12203652f12fe12e824161ff9d1e5cf2a7dc4ff02ba94f413" dependencies = [ "arrow-array", "arrow-buffer", @@ -262,9 +262,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" +checksum = "575b42f1fc588f2da6977b94a5ca565459f5ab07b60545e17243fb9a7ed6d43e" dependencies = [ "ahash", "arrow-array", @@ -277,15 +277,15 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" +checksum = "32aae6a60458a2389c0da89c9de0b7932427776127da1a738e2efc21d32f3393" [[package]] name = "arrow-select" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" +checksum = "de36abaef8767b4220d7b4a8c2fe5ffc78b47db81b03d77e2136091c3ba39102" dependencies = [ "ahash", "arrow-array", @@ -297,9 +297,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" +checksum = "e435ada8409bcafc910bc3e0077f532a4daa20e99060a496685c0e3e53cc2597" dependencies = [ "arrow-array", "arrow-buffer", @@ -437,9 +437,9 @@ dependencies = [ [[package]] name = "brotli" -version = "3.5.0" +version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" +checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -448,9 +448,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.5.1" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +checksum = "9a45bd2e4095a8b518033b128020dd4a55aab1c0a381ba4404a472630f4bc362" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -521,15 +521,14 @@ dependencies = [ "android-tzdata", "iana-time-zone", "num-traits", - "serde", "windows-targets 0.52.5", ] [[package]] name = "chrono-tz" -version = "0.8.6" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" dependencies = [ "chrono", "chrono-tz-build", @@ -538,9 +537,9 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" dependencies = [ "parse-zoneinfo", "phf", @@ -590,16 +589,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" -[[package]] -name = "core-foundation" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation-sys" version = "0.8.6" @@ -691,40 +680,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "darling" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" -dependencies = [ - "darling_core", - "darling_macro", -] - -[[package]] -name = "darling_core" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "darling_macro" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" -dependencies = [ - "darling_core", - "quote", - "syn 1.0.109", -] - [[package]] name = "dashmap" version = "5.5.3" @@ -740,8 +695,8 @@ dependencies = [ [[package]] name = "datafusion" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "ahash", "arrow", @@ -793,8 +748,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "ahash", "arrow", @@ -814,16 +769,16 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "tokio", ] [[package]] name = "datafusion-execution" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "arrow", "chrono", @@ -842,12 +797,13 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "ahash", "arrow", "arrow-array", + "arrow-buffer", "chrono", "datafusion-common", "paste", @@ -859,8 +815,8 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "arrow", "base64 0.22.1", @@ -885,8 +841,8 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "ahash", "arrow", @@ -902,8 +858,8 @@ dependencies = [ [[package]] name = "datafusion-functions-array" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "arrow", "arrow-array", @@ -921,8 +877,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "arrow", "async-trait", @@ -934,13 +890,14 @@ dependencies = [ "indexmap", "itertools", "log", + "paste", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "ahash", "arrow", @@ -969,8 +926,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "arrow", "datafusion-common", @@ -980,8 +937,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "ahash", "arrow", @@ -1013,8 +970,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "38.0.0" -source = "git+https://github.com/adriangb/datafusion.git#8c4acac95263a765a6ec5b715955b9911e7d2e0e" +version = "39.0.0" +source = "git+https://github.com/apache/datafusion.git#59120255916bcd624161ce8f5df255f2cc838406" dependencies = [ "arrow", "arrow-array", @@ -1038,15 +995,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "deranged" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" -dependencies = [ - "powerfmt", -] - [[package]] name = "derivative" version = "2.2.0" @@ -1070,6 +1018,17 @@ dependencies = [ "subtle", ] +[[package]] +name = "displaydoc" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -1091,15 +1050,6 @@ dependencies = [ "serde", ] -[[package]] -name = "encoding_rs" -version = "0.8.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" -dependencies = [ - "cfg-if", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -1147,9 +1097,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flatbuffers" -version = "23.5.26" +version = "24.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +checksum = "8add37afff2d4ffa83bc748a70b4b1370984f6980768554182424ef71447c35f" dependencies = [ "bitflags 1.3.2", "rustc_version", @@ -1176,27 +1126,6 @@ dependencies = [ "spin 0.9.8", ] -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1339,25 +1268,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" -[[package]] -name = "h2" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "half" version = "2.4.1" @@ -1443,120 +1353,162 @@ dependencies = [ ] [[package]] -name = "http" -version = "0.2.12" +name = "humantime" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] -name = "http-body" -version = "0.4.6" +name = "iana-time-zone" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" dependencies = [ - "bytes", - "http", - "pin-project-lite", + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", ] [[package]] -name = "httparse" -version = "1.8.0" +name = "iana-time-zone-haiku" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] [[package]] -name = "httpdate" -version = "1.0.3" +name = "icu_collections" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] [[package]] -name = "humantime" -version = "2.1.0" +name = "icu_locid" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] [[package]] -name = "hyper" -version = "0.14.29" +name = "icu_locid_transform" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f361cde2f109281a220d4307746cdfd5ee3f410da58a70377762396775634b33" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", - "want", + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", ] [[package]] -name = "hyper-rustls" -version = "0.24.2" +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" dependencies = [ - "futures-util", - "http", - "hyper", - "rustls", - "tokio", - "tokio-rustls", + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", ] [[package]] -name = "iana-time-zone" -version = "0.1.60" +name = "icu_normalizer_data" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f8ac670d7422d7f76b32e17a5db556510825b29ec9154f235977c9caba61036" dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows-core", + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", ] [[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" +name = "icu_properties_data" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" dependencies = [ - "cc", + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", ] [[package]] -name = "ident_case" -version = "1.0.1" +name = "icu_provider_macros" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] [[package]] name = "idna" -version = "0.5.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "4716a3a0933a1d01c2f72450e89596eb51dd34ef3c211ccd875acdf1f8fe47ed" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", + "smallvec", + "utf8_iter", ] [[package]] @@ -1598,12 +1550,6 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" -[[package]] -name = "ipnet" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" - [[package]] name = "itertools" version = "0.12.1" @@ -1739,6 +1685,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "lock_api" version = "0.4.12" @@ -1791,12 +1743,6 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1823,23 +1769,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "native-tls" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", -] - [[package]] name = "nom" version = "7.1.3" @@ -1900,12 +1829,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-conv" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" - [[package]] name = "num-integer" version = "0.1.46" @@ -1968,27 +1891,18 @@ dependencies = [ [[package]] name = "object_store" -version = "0.9.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8718f8b65fdf67a45108d1548347d4af7d71fb81ce727bbf9e3b2535e079db3" +checksum = "fbebfd32c213ba1907fa7a9c9138015a8de2b43e30c5aa45b18f7deb46786ad6" dependencies = [ "async-trait", - "base64 0.21.7", "bytes", "chrono", "futures", "humantime", - "hyper", "itertools", "parking_lot", "percent-encoding", - "quick-xml", - "rand", - "reqwest", - "ring", - "rustls-pemfile 2.1.2", - "serde", - "serde_json", "snafu", "tokio", "tracing", @@ -2002,50 +1916,6 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "openssl" -version = "0.10.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" -dependencies = [ - "bitflags 2.5.0", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "openssl-probe" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" - -[[package]] -name = "openssl-sys" -version = "0.9.102" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "ordered-float" version = "2.10.1" @@ -2089,9 +1959,9 @@ dependencies = [ [[package]] name = "parquet" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "096795d4f47f65fd3ee1ec5a98b77ab26d602f2cc785b0e4be5443add17ecc32" +checksum = "29c3b5322cc1bbf67f11c079c42be41a55949099b78732f7dba9e15edde40eab" dependencies = [ "ahash", "arrow-array", @@ -2120,6 +1990,7 @@ dependencies = [ "tokio", "twox-hash", "zstd", + "zstd-sys", ] [[package]] @@ -2239,12 +2110,6 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -2260,16 +2125,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "quick-xml" -version = "0.31.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "quote" version = "1.0.36" @@ -2329,9 +2184,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.4" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", @@ -2341,9 +2196,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", @@ -2352,67 +2207,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" - -[[package]] -name = "reqwest" -version = "0.11.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" -dependencies = [ - "base64 0.21.7", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-rustls", - "ipnet", - "js-sys", - "log", - "mime", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls", - "rustls-native-certs", - "rustls-pemfile 1.0.4", - "serde", - "serde_json", - "serde_urlencoded", - "sync_wrapper", - "system-configuration", - "tokio", - "tokio-rustls", - "tokio-util", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-streams", - "web-sys", - "winreg", -] - -[[package]] -name = "ring" -version = "0.17.8" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" -dependencies = [ - "cc", - "cfg-if", - "getrandom", - "libc", - "spin 0.9.8", - "untrusted", - "windows-sys 0.52.0", -] +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "rsa" @@ -2462,65 +2259,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rustls" -version = "0.21.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" -dependencies = [ - "log", - "ring", - "rustls-webpki", - "sct", -] - -[[package]] -name = "rustls-native-certs" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" -dependencies = [ - "openssl-probe", - "rustls-pemfile 1.0.4", - "schannel", - "security-framework", -] - -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64 0.21.7", -] - -[[package]] -name = "rustls-pemfile" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" -dependencies = [ - "base64 0.22.1", - "rustls-pki-types", -] - -[[package]] -name = "rustls-pki-types" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" - -[[package]] -name = "rustls-webpki" -version = "0.101.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "rustversion" version = "1.0.17" @@ -2542,56 +2280,22 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "schannel" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" -dependencies = [ - "windows-sys 0.52.0", -] - [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "sea-query" version = "0.30.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4166a1e072292d46dc91f31617c2a1cdaf55a8be4b5c9f4bf2ba248e3ac4999b" dependencies = [ - "chrono", "derivative", "inherent", "ordered-float 3.9.2", - "sea-query-attr", "sea-query-derive", - "serde_json", -] - -[[package]] -name = "sea-query-attr" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168a31e0ef5a791ad26aa97c502eaed8d2a1ffdc22b3249f9947c1e12be6b477" -dependencies = [ - "darling", - "heck 0.4.1", - "quote", - "syn 1.0.109", ] [[package]] @@ -2600,9 +2304,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36bbb68df92e820e4d5aeb17b4acd5cc8b5d18b2c36a4dd6f4626aabfa7ab1b9" dependencies = [ - "chrono", "sea-query", - "serde_json", "sqlx", ] @@ -2619,29 +2321,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "security-framework" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" -dependencies = [ - "bitflags 2.5.0", - "core-foundation", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317936bbbd05227752583946b9e66d7ce3b489f84e11a94a510b4437fef407d7" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "semver" version = "1.0.23" @@ -2685,18 +2364,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - [[package]] name = "sha1" version = "0.10.6" @@ -2868,7 +2535,6 @@ dependencies = [ "atoi", "byteorder", "bytes", - "chrono", "crc", "crossbeam-queue", "either", @@ -2883,7 +2549,6 @@ dependencies = [ "indexmap", "log", "memchr", - "native-tls", "once_cell", "paste", "percent-encoding", @@ -2897,7 +2562,6 @@ dependencies = [ "tokio-stream", "tracing", "url", - "uuid", ] [[package]] @@ -2931,7 +2595,6 @@ dependencies = [ "sha2", "sqlx-core", "sqlx-mysql", - "sqlx-postgres", "sqlx-sqlite", "syn 1.0.109", "tempfile", @@ -2950,7 +2613,6 @@ dependencies = [ "bitflags 2.5.0", "byteorder", "bytes", - "chrono", "crc", "digest", "dotenvy", @@ -2979,7 +2641,6 @@ dependencies = [ "stringprep", "thiserror", "tracing", - "uuid", "whoami", ] @@ -2993,7 +2654,6 @@ dependencies = [ "base64 0.21.7", "bitflags 2.5.0", "byteorder", - "chrono", "crc", "dotenvy", "etcetera", @@ -3019,7 +2679,6 @@ dependencies = [ "stringprep", "thiserror", "tracing", - "uuid", "whoami", ] @@ -3030,7 +2689,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b244ef0a8414da0bed4bb1910426e890b19e5e9bccc27ada6b797d05c55ae0aa" dependencies = [ "atoi", - "chrono", "flume", "futures-channel", "futures-core", @@ -3045,7 +2703,6 @@ dependencies = [ "tracing", "url", "urlencoding", - "uuid", ] [[package]] @@ -3053,8 +2710,6 @@ name = "sqlx-sqlite-index-example" version = "0.1.0" dependencies = [ "anyhow", - "arrow", - "arrow-schema", "async-trait", "datafusion", "datafusion-common", @@ -3068,11 +2723,16 @@ dependencies = [ "sqlx", "tempfile", "thiserror", - "time", "tokio", "url", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -3141,30 +2801,14 @@ dependencies = [ ] [[package]] -name = "sync_wrapper" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" - -[[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" +name = "synstructure" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ - "core-foundation-sys", - "libc", + "proc-macro2", + "quote", + "syn 2.0.66", ] [[package]] @@ -3211,31 +2855,22 @@ dependencies = [ ] [[package]] -name = "time" -version = "0.3.36" +name = "tiny-keccak" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde", - "time-core", + "crunchy", ] [[package]] -name = "time-core" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" - -[[package]] -name = "tiny-keccak" -version = "2.0.2" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" dependencies = [ - "crunchy", + "displaydoc", + "zerovec", ] [[package]] @@ -3281,16 +2916,6 @@ dependencies = [ "syn 2.0.66", ] -[[package]] -name = "tokio-rustls" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" -dependencies = [ - "rustls", - "tokio", -] - [[package]] name = "tokio-stream" version = "0.1.15" @@ -3315,12 +2940,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tower-service" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" - [[package]] name = "tracing" version = "0.1.40" @@ -3353,12 +2972,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "try-lock" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" - [[package]] name = "twox-hash" version = "1.6.3" @@ -3420,17 +3033,11 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" -[[package]] -name = "untrusted" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" - [[package]] name = "url" -version = "2.5.0" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "f7c25da092f0a868cdf09e8674cd3b7ef3a7d92a24253e663a2fb85e2496de56" dependencies = [ "form_urlencoded", "idna", @@ -3443,6 +3050,18 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "uuid" version = "1.8.0" @@ -3474,15 +3093,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "want" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" -dependencies = [ - "try-lock", -] - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -3520,18 +3130,6 @@ dependencies = [ "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "wasm-bindgen-macro" version = "0.2.92" @@ -3561,19 +3159,6 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" -[[package]] -name = "wasm-streams" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "web-sys" version = "0.3.69" @@ -3752,14 +3337,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] -name = "winreg" -version = "0.50.0" +name = "write16" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" [[package]] name = "xz2" @@ -3770,6 +3357,30 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.34" @@ -3790,35 +3401,78 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb2cc8827d6c0994478a15c53f374f46fbd41bea663d809b14744bc42e6b109c" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97cf56601ee5052b4417d90c8755c6683473c926039908196cf35d99f893ebe7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "zstd" -version = "0.13.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "7.1.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" +version = "2.0.9+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" dependencies = [ "cc", "pkg-config", diff --git a/sqlx-sqlite/Cargo.toml b/sqlx-sqlite/Cargo.toml index 32a53e2..a796f9a 100644 --- a/sqlx-sqlite/Cargo.toml +++ b/sqlx-sqlite/Cargo.toml @@ -5,21 +5,18 @@ edition = "2021" [dependencies] anyhow = "~1" -arrow = "51.0.0" -async-trait = "0.1.80" -datafusion = { features = ["backtrace"], git = "https://github.com/adriangb/datafusion.git" } -datafusion-expr = { git = "https://github.com/adriangb/datafusion.git" } -datafusion-optimizer = { git = "https://github.com/adriangb/datafusion.git" } -datafusion-common = { git = "https://github.com/adriangb/datafusion.git" } -datafusion-physical-expr = { git = "https://github.com/adriangb/datafusion.git" } -datafusion-sql = { git = "https://github.com/adriangb/datafusion.git" } -sqlx = { version = "~0", features = [ "runtime-tokio", "tls-native-tls", "sqlite", "json", "uuid", "chrono", "macros" ] } -tempfile = "3.10.1" -thiserror = "~1" -tokio = { version = "~1", features = [ "rt-multi-thread", "macros" ] } -url = "2.5.0" -arrow-schema = "51.0.0" -object_store = { version = "0.9.1", features = ["gcp", "http"] } -time = "0.3.36" -sea-query = { version = "0", features = ["with-chrono", "backend-sqlite", "hashable-value", "with-json", "with-chrono", "derive", "attr", "thread-safe"] } -sea-query-binder = { version = "0", features = ["sqlx-sqlite", "with-json", "with-chrono", ] } +async-trait = "0" +datafusion = { features = ["backtrace"], git = "https://github.com/apache/datafusion.git" } +datafusion-expr = { git = "https://github.com/apache/datafusion.git" } +datafusion-optimizer = { git = "https://github.com/apache/datafusion.git" } +datafusion-common = { git = "https://github.com/apache/datafusion.git" } +datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git" } +datafusion-sql = { git = "https://github.com/apache/datafusion.git" } +sqlx = { version = "~0", features = [ "runtime-tokio", "sqlite" ] } +tempfile = "3" +thiserror = "1" +tokio = { version = "1", features = [ "rt-multi-thread", "macros" ] } +url = "2" +object_store = { version = "0" } +sea-query = { version = "0", features = ["backend-sqlite", "hashable-value", "derive", "thread-safe"] } +sea-query-binder = { version = "0", features = ["sqlx-sqlite" ] } diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index 4ebf78cf96088c7148cb47caf804cc036de75a34..c8a6b3b0258cc8e68f3ab13efabd9b7d8ac58b6d 100644 GIT binary patch literal 28672 zcmeHPdvIJ;8NZL+``X=m(KxkM-=2S1L7Y#I696vJ^%+TqcAFtf{uvez~~4lj0&S8)X{dpqT_eZJ-g{SoH_C* z%$%L3yZbx8-?#Vt&X=A2&UZFDw{Izxi`j#7^D|TB?0{CM8HTnoo7FTe20u3ZRBr}6 z2(ju-uRf66$F$!0`%}RiS}c{*{1d^W{y})4Zps1WfO0@Npd3&RCF#cQ53y|F$TXCkg@xjg#p3M#BKI`L*u@QYhA#Agva51jx5JP(4Um|` zzokXso*GQd$m}IDy8tlf;-TVvcH53kTgP_n$!;FslO5YNdC8^;2(op2ViKlF!V4B8 zyK7?8_Fdyu#zN`3VqxF4<>Ers)3evLwsh=r@DygPYJ=IXa}6uKy<68xvj>XDs7+Ig zLC55ZTk}4k!nd1IhvAfO0@Npd3&RCulmp5E<-oh!fn{a~Dg0}D z0%jGdZ3P&0W``SY4lL~fz?}5pZyNklH|2nGKslfsP!1>ulmp5E<$!WPIiMU+4k!oS z`wrCWW;fX&IP2RgW*p|F2WP_B*}<9M+Te;{EU5GU;6LR*?BDIr`(u7x>c!MIQrD#} zPNC$Vk`E_uOimDaS{Q3CT96U}>5uDQ)jz8r)vwSm(tCBJ{a5>o_H;M} zH09}O-O=H17-%N>Hp?muuXhAp$>Yt~@~!hxfyYl{a?mf^dTOP zWBteaKhY*0k74~z{YU6x9yc*$-yP_KJZ@mUN8j6c5#a9Gd5E=t>)nmx06Q%Fr1ob2 z2RuCkS*vLs9VYprPWauT5zHL$VV?9~C{hf|6c>mTLo&sfNHHi=Y!oR5WQy}e3M^Bc zCsO2OigQJZewpI^B1NA}@jj8FSEe{eq}U)+tqTnQmmCJaw5eVnWA5$SS?fZi4^TJMXyNFCR1z>DY7y}k4TY` zDY`|9R+(bGNYNrwbcqzJWQtCaqFJWs5Ghv56zfEaCYfTbNU=huSR+!TWs21z1(GS+ zMT$n5qD`b|kSVevMZHXs5h<3-6s;n~GMS=9q^OfAR*4jWOwlY-_%g*xks>8iG>H^R znPP=Vk&r3UB84YYAd$kADH=rzN2X{HDQuadUZk*Oisd3jT&7qiQp9A6I+4PZDFTtg zkSO|mP{1cw_Oqx=kOBdGrhUhP)}*I5l2>SMHo`v-fW!ZCL;d6~pKndTI0?v%_ebM{ z`9^Q)v$a8LJgnWT{j9-_K3nTh`ce3Rc2A$PG#zqaz7T%2Cex`k#luh5wI_Y{(sbUD zpxN@=XgW*EmFi7T-PS*y>MDojHo*R}3Iye{20`FgD0tWQZ%?Qi&8lGES zk6LL!uD{m858>LAULE*!IsOkH>Sb#x{QmzG6wkpk!TrJQs{U8?f3$C+>i@_GRsXYZ z?5h5!U$_e2Tx%z-`c?hU>|Lq)UzjDT{%5j8)&IgQQT0EQC93`xW{Il*nJiKDzc5Qw z{jchOy5CXTB4G9l!uo$_{4p(f6?Onlz#hP-g2Tb?U}Mk;rvTpY|L8yIKj`1*-{@cE zZ}Ug}tnZ{wrhc3HVd`tCJ5$F}yHlg7bn;a4H_3;RcO++$o0DBhJMrhl6NxV+Zb9{&a=)#&h1Xg8F$+3|Jc8?AF=PU zXY5V(T3feXupYJUv5r_JG` z%As)v!Z^@XW;k6D2h%vP@NE^SBN!)b{)G@o-M zpP@g?a8^+cjYIMoIIpWZK&XIhE9xTIINjWqQ$!F-gGMuJ}L-QGy6nut0E5lhq zIW!K*XXvaloOHyY`J5y9482r_gD8i_A^8m5Q-;$RacDm0NIpZql;Jc`4vjQC;?R7~k$i^6C&Q_u92y7mIa3LHo#g$wxtY8_ z2ZTe@8L~N33A>$VGyI4V1%X`7RG{CloM#5sl&F`^idznMymW7vd2(-*QgQ!zP) z6)~b1kh__R!7<{5K~onpH&f9$Ml51PF(7X<6^&zJ2w5ws=mrg2=U^q|xJ@hdiAI5OX`WAE@j}PHQ zdtxWLmd6LNcQVn2j`8>ahI87lqic8^V>rG27&^-1ISgmIKZ6!|ydQg=-X2uu@jmRH z_Bzl4kN09Y6@CiM^Y{h~XT_gDM|ivk!wK@c&|x0$#%`}$Kyy649y@Qj8_+C|cVRe# zeg@6(cqfLF=})0)9`C?#Uj1|EY93#Q;S_rjeU!)7V#ja>P>ILaVEY+IM_2LqY7FP# ze}xY5csqvE@n1j(dAtq7nfa?xk;k*xcI-Slz~dQgJ!ji!KaaO!ICcLl+Q;K97|!B< F`G3yIG(`Xa delta 33 ncmZp8z}TQLL7J73fq{W>Vxv7bBZFSi0$w1GL143>z%+gUg0u$b diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index d06a1f0..bbd837b 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -2,15 +2,11 @@ use std::{ collections::HashMap, fmt::Display, fs::File, path::Path }; -use arrow::{ - array::AsArray, - datatypes::{Int16Type, Int32Type, Int64Type, UInt16Type, UInt32Type, UInt64Type}, -}; +use datafusion::arrow::datatypes::{Int16Type, Int32Type, Int64Type, UInt16Type, UInt32Type, UInt64Type}; use datafusion::{ - datasource::physical_plan::parquet::{ParquetAccessPlan, RequestedStatistics, RowGroupAccess, StatisticsConverter}, - parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder, - prelude::*, + datasource::physical_plan::parquet::{ParquetAccessPlan, RowGroupAccess, StatisticsConverter}, parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder, prelude::* }; +use datafusion::arrow::array::AsArray; use datafusion_common::{internal_datafusion_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::Operator; use sqlx::SqlitePool; @@ -144,22 +140,19 @@ impl SQLiteIndex { // extract the parquet statistics from the file's footer let metadata = reader.metadata(); - + let schema = reader.schema(); + let parquet_schema = reader.parquet_schema(); + let row_groups = metadata.row_groups(); + let row_counts = StatisticsConverter::row_group_row_counts(row_groups.iter())?; let mut column_statistics: Vec = Vec::with_capacity(reader.schema().fields().len() * metadata.num_row_groups()); for column in 0..reader.schema().fields().len() { - let column_name = reader.schema().field(column).name().clone(); - - let row_counts = StatisticsConverter::row_counts(reader.metadata())?; - let null_counts_array = StatisticsConverter::try_new(&column_name, RequestedStatistics::NullCount, reader.schema())?.extract(reader.metadata())?; - let null_counts = null_counts_array.as_primitive::(); - - let min_values = - StatisticsConverter::try_new(&column_name.clone(), RequestedStatistics::Min, reader.schema())? - .extract(reader.metadata())?; - let max_values = - StatisticsConverter::try_new(&column_name.clone(), RequestedStatistics::Max, reader.schema())? - .extract(reader.metadata())?; + let column_name = schema.field(column).name().clone(); + let converter = StatisticsConverter::try_new(&column_name, schema, parquet_schema)?; + let min_values = converter.row_group_mins(row_groups.iter())?; + let max_values = converter.row_group_maxes(row_groups.iter())?; + let null_counts = converter.row_group_null_counts(row_groups.iter())?; + let null_counts = null_counts.as_primitive::(); for row_group in 0..reader.metadata().num_row_groups() { let stats = ColumnStatisticsInsertBuilder::new( @@ -170,7 +163,7 @@ impl SQLiteIndex { ); // match on the data type of the column, downcast the array and extract the min/max values and build the statistics match reader.schema().field(column).data_type() { - arrow::datatypes::DataType::Int8 => { + datafusion::arrow::datatypes::DataType::Int8 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; @@ -178,7 +171,7 @@ impl SQLiteIndex { let stats = stats.build(MinMaxStats::Int(min, max)); column_statistics.push(stats); } - arrow::datatypes::DataType::UInt8 => { + datafusion::arrow::datatypes::DataType::UInt8 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; @@ -186,7 +179,7 @@ impl SQLiteIndex { let stats = stats.build(MinMaxStats::Int(min, max)); column_statistics.push(stats); } - arrow::datatypes::DataType::Int16 => { + datafusion::arrow::datatypes::DataType::Int16 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; @@ -194,7 +187,7 @@ impl SQLiteIndex { let stats = stats.build(MinMaxStats::Int(min, max)); column_statistics.push(stats); } - arrow::datatypes::DataType::UInt16 => { + datafusion::arrow::datatypes::DataType::UInt16 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; @@ -202,7 +195,7 @@ impl SQLiteIndex { let stats = stats.build(MinMaxStats::Int(min, max)); column_statistics.push(stats); } - arrow::datatypes::DataType::Int32 => { + datafusion::arrow::datatypes::DataType::Int32 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; @@ -210,7 +203,7 @@ impl SQLiteIndex { let stats = stats.build(MinMaxStats::Int(min, max)); column_statistics.push(stats); } - arrow::datatypes::DataType::UInt32 => { + datafusion::arrow::datatypes::DataType::UInt32 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; @@ -218,7 +211,7 @@ impl SQLiteIndex { let stats = stats.build(MinMaxStats::Int(min, max)); column_statistics.push(stats); } - arrow::datatypes::DataType::Int64 => { + datafusion::arrow::datatypes::DataType::Int64 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group); @@ -226,7 +219,7 @@ impl SQLiteIndex { let stats = stats.build(MinMaxStats::Int(min, max)); column_statistics.push(stats); } - arrow::datatypes::DataType::Utf8 => { + datafusion::arrow::datatypes::DataType::Utf8 => { let min_values = min_values.as_string::(); let max_values = max_values.as_string::(); let min = min_values.value(row_group).to_string(); @@ -234,7 +227,7 @@ impl SQLiteIndex { let stats = stats.build(MinMaxStats::String(min, max)); column_statistics.push(stats); } - arrow::datatypes::DataType::LargeUtf8 => { + datafusion::arrow::datatypes::DataType::LargeUtf8 => { let min_values = min_values.as_string::(); let max_values = max_values.as_string::(); let min = min_values.value(row_group).to_string(); @@ -383,7 +376,6 @@ impl SQLiteIndex { } } - #[derive(Debug, Clone)] pub struct FileScanPlan { pub file_size: u64, diff --git a/sqlx-sqlite/src/main.rs b/sqlx-sqlite/src/main.rs index b7443f9..1527333 100644 --- a/sqlx-sqlite/src/main.rs +++ b/sqlx-sqlite/src/main.rs @@ -2,8 +2,8 @@ use std::{ any::Any, cell::RefCell, fmt::Display, fs::{self, DirEntry, File}, ops::Range, path::{Path, PathBuf}, sync::{Arc, Mutex} }; -use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; -use arrow_schema::SchemaRef; +use datafusion::arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; +use datafusion::arrow::datatypes::SchemaRef; use async_trait::async_trait; use datafusion::{ datasource::{ @@ -67,7 +67,7 @@ mod index; /// have matching data. │ │ rg 0 │ │ /// │ │ └────────┘ │ /// │ ┌────────┐ │ -/// │ │ │ scan │ │ +/// │ │ │ skip │ │ /// ─ ▶ │ │ rg 3 │ │ /// The index can choose to │ └────────┘ │ /// scan entire files, │ ... │ @@ -100,7 +100,7 @@ async fn main() -> anyhow::Result<()> { let data = DemoData::try_new()?; // Create a table provider with and our special index. - let index = SQLiteIndex::new(pool); + let index = SQLiteIndex::new(pool.clone()); let provider = Arc::new(IndexTableProvider::try_new(data.path(), index).await?); println!("** Table Provider:"); println!("{provider}\n"); @@ -154,6 +154,8 @@ async fn main() -> anyhow::Result<()> { .await?; println!("Files scanned: {:?}\n", provider.last_execution()); + pool.close().await; + Ok(()) } From 0466b357ce8ac6118cb9a7937b3353daf06d9a79 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Mon, 10 Jun 2024 11:45:35 -0500 Subject: [PATCH 08/21] Remove LIKE example --- sqlx-sqlite/src/index.rs | 42 ++-------------------------------------- sqlx-sqlite/src/main.rs | 8 -------- 2 files changed, 2 insertions(+), 48 deletions(-) diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index bbd837b..d62ad83 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -502,46 +502,8 @@ pub fn push_down_filter(filter: &Expr) -> Option { let inner_pushdown = push_down_filter(inner); inner_pushdown.map(|inner_pushdown| inner_pushdown.not()) }, - Expr::Like(inner) => { - let negated = inner.negated; - let expr = *inner.expr.clone(); - let pattern = *inner.pattern.clone(); - let escape_char = inner.escape_char; - let case_insensitive = inner.case_insensitive; - - let column = match &expr { - Expr::Column(column) => column.clone(), - _ => return None - }; - let pattern = match &pattern { - Expr::Literal(ScalarValue::Utf8(Some(pattern))) => pattern.clone(), - _ => return None - }; - // We don't support escape characters in this example - if escape_char.is_some() { - return None; - } - // Find the prefix in pattern by looking for the first `%` and truncate - let prefix_len = pattern.chars().position(|c| c == '%').unwrap_or_else(|| pattern.len()); - let mut prefix = pattern.chars().take(prefix_len).collect::(); - let mut min_val_col = SeaQExpr::col(ColumnStatistics::StringMinValue); - // If this is a case insensitive match we need to convert the prefix to lowercase - if case_insensitive { - prefix = prefix.to_lowercase(); - min_val_col = SeaQExpr::expr(sea_query::Func::lower(min_val_col)); - }; - - let filter = SeaQExpr::col(ColumnStatistics::ColumnName).eq(column.name).and( - min_val_col.gte(SqlValue::String(Some(Box::new(prefix.clone())))) - ); - if negated { - Some(filter.not()) - } else { - Some(filter) - } - }, - // We could handle more cases here, at least simple ones involving nulls, negations, etc. - // But this example does not implement that + // We could handle more cases here, e.g. `LIKE`, `IN`, etc. + // But this example does not implement those to keep complexity under control _ => None } } diff --git a/sqlx-sqlite/src/main.rs b/sqlx-sqlite/src/main.rs index 1527333..35c57b2 100644 --- a/sqlx-sqlite/src/main.rs +++ b/sqlx-sqlite/src/main.rs @@ -146,14 +146,6 @@ async fn main() -> anyhow::Result<()> { .await?; println!("Files scanned: {:?}\n", provider.last_execution()); - // it's even possible to get LIKE pushed down to the index - println!("** Select data, predicate `text LIKE 'text2%0'`"); - ctx.sql("SELECT file_name, count(text) FROM index_table WHERE text LIKE 'text2%0' GROUP BY file_name") - .await? - .show() - .await?; - println!("Files scanned: {:?}\n", provider.last_execution()); - pool.close().await; Ok(()) From 4d14fe731ba69434626695f3615a4dbfacb4813a Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 11 Jun 2024 20:52:48 -0500 Subject: [PATCH 09/21] Draft using PruningPredicate to generate a PhysiclaExpr --- sqlx-sqlite/index.db | Bin 28672 -> 28672 bytes sqlx-sqlite/src/index.rs | 666 +++++++++++++++++---------------------- sqlx-sqlite/src/main.rs | 30 +- 3 files changed, 313 insertions(+), 383 deletions(-) diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index c8a6b3b0258cc8e68f3ab13efabd9b7d8ac58b6d..af3a784d3797343c1b5cd5f4774e621fa775795a 100644 GIT binary patch literal 28672 zcmeHPd2C$88GrY+S7OKGI8MB4=dcqyws(BRgd+h=NfQ#BID`OUahy$RaO}ihlK=$- zfl9PH23jbsT8;uOEmB3*ib6#X0Tq;jTG|4oK!w_>0tr!&=mGTn-h1O?#>2p$p_)yc z-S>XK-?uxz_l@&?GrPN^f6H*GnA<-#esHjq>(CZxhM{f9DViSknh^ zug`h>K@U?}>-b%%%)6SE%4_L|Ge1c0hX?AW98eA@2b2TK0p);lKslfsP!1>uJ`N6C zi!HahtxdnKvNX7Nq&PG-GI?;cFi{#T4NsJYhbHEouut3Gw{f5^H?Z;4Eq%F$d3Zyv zDc7)nc%)bu-q(=ZJA7bxw3OSrZ6LRG=awxkGlJ2 zCr3sKLt~SprD+txg$n*0JY(VD@Mz)E!I4RLlK3?MH#$Ffbl&-i()jS`fw_|Kb%9_S znAJotc=g$)Q{CLG?~W{+I6ML^XJMjvc(OP;ROAjV#x4$Q3SDRcnY{oo=Hh|kc<$`&o6g#}eRpni-|pPToder8ZG|9b^=%!1VMgHv z7bLfH>!$vlebbDI;meDKy_c1W6Vsl~y{az%+p%tt<3M?PTiCP$^qqoazHtt98eA@2b2TK0p);lKslfsIKCZNXf{TT z|IC_zIelhkDZp4@Hu~Y>!0Z|Tj7iVDp}}8uQw}Hxlmp5E<$!WPIiMU+4k!nd1IhvA zfO6pD??9z)Hb)Btb3Uz7XT!Mk%=_W!oXq>??dCP+keN6BYrJkeYdl~~!KVVgniD$v6r+d;?>Q|{dQkSIGrR?OdlXoUZlD$bM@tee5iGzs~5^nHZ@U>txSRZ)) z^Zwobn17=0doOtRc!#``yukgfd#`)gJ=sk-FFId$#+_4~r2RYl8}@{~!A@B(S&v!w zTQ^%rt%7xiwbqz4E-+3r))))*clB5GC-iUYx9C^sd-Y9vyI!sRNBgt(j4?I+2a$ZB zSL>b*Utyq`(T7m}*hdg);{k;2MRp$|tvpbLtv}k8h^*y-McDd| z^#?>+c%TwnS6L4r(#!)D*xF)UiO3orSct79)@DRj^S}aZzHZeZ(!>K9Y<}N-4UtA3 zNMrLl^Fc&b@jwcj+s$K$tmJ_tHgje_A}e?xfsMbISwxoeK!A-O8gC-fzym%uK5IOJ zNIegD*f`g?0g)UJxY$@_?5ZnY6zEmH2VX5wTWf@$9m3&1`L6cpuD7?YTmXVMkI)_y zI(w_i7eZ)=5axgpcs6Dx;ya&#MP@g@&PrJ3Pd_kaRqbF=^jNVaG2tU_@nK#fd z2W{o^K@q)zkEp0EpBMP?plWJn#uN{!x3{Le8_+0Tb@dDy4X3xFd~V=}gVDi|cr18m zVR;vv1VLy*d@|o!J}2;}hY`Nq*-^}_^fAkRt>v8{priOG0{H!4>xg9e>*iqnuf|G5 z>UhA$`eXV#h%Dm)3+p%Ok0P>^2TZJAsNaZ4Ee{x2Z_)QuktG1oD;v*lti7){SCJYx z4?~%q^;r9v_Fg+#3>PYLsj<6S{6NYW*hREZ>=QpqBb zq(dsH6iKjDQX!J$rILjrNxM|CKqP6CN-`ozt5lK}N!CgwDUqZ_DoKhY%~DB1Bv~Vs z1R}|5sl*panxqm>Bx#gNT#;mzRN{yvE2R=!Bv~PqSR%=Csl*gX8l)0KB&nB5bde+{ zm1rVKRwgN5Et1qpC0B_g%cPPkMUtgb$)`nH1$)rfKNGd6bB$ZOhgh)~$m5hrd3#F37BFO@&6_)^KRNa9H)pAt!2spNo2;z%X?MG{*oDT*YP zRI*PbF{P3rk;ITl+RA%DLYtG>3RbEZ1O+YYKSTi)y-V`p=l^%v>*LS=KM1}4vCPGp z{>-{eHshq+PHD5&KMgg{@hySPxsbScjmYZZiL2K4E@${u%}hQQbZk4ygX0u6P`G{eOLV02W@N zZhF}=GhCla2I2|#khsNm%<80$Nm~v>Go-ocj80RdG(;jl_r=Vf_FiWPOLt;1r zOQxVDVmJazrl1F6I08$ipy^>a0!yZ#%V9VIOQxW$VK@Ryrl5~uIFThK+)b_R25YCF zabY-Zv9(>)+Agq`K&QfRS}BLd>4G=}S`&t|Hs*BFIGte}=tUS#3+2!_onahkJ{V4O z%;}(UI>I>6Z7`fQltbfmgmIv~U^uH|4yJKnF4Pm~Cm2o><!)c5;bh#7f z!#L0}Fq~DCL*wMbIM5<6oRu-BoxYy-Fb?zt3}*%9&^Ya39B2X<&hm(pr&AnW4sf9A zXE+U%6UWJiQ=CAV&v5Ev4xQrpaEcSC>lsdta%h}ziW4a08BR9l&?%k|r#OMio#E6` z4viB|aRQ|}!&w${=oHU~Q=C9;&Ty7e4viD$Gl629;nc<)n$P(tpP~9@I7=vp#)I6LV-j!zG31g8G`_ET$Y9C(36is2NUm%%S<5kMbERXoiC*hsKHW8A@h`Qx$V) zKIfx+h8mgSETSA5C(36iiWyF2%$Z)kkMbF+V1`pcIW$g`&rtR!SZjsDBZL>!vVkj+^V?k3D-q+>=L1adh`U^{_fq#_1QWys_# zfsF-*k&GE}49MdwfjtI>k%$;HjUkJ(1ojjdMi4XN7?8tRavsOquBeM6D5JM1<+zz_3w7!Wts#gcw2`0kRblC?MF) z!XBLXOyVI#plV=&Zk!lMT#pEp3M|lt6Ag(Sh(LY70-ZQ`J5i6w79Qxp!HlQfk<)l!4fg)!uSMik9$1aNpL_pAWFrqWVed=cQ;2Nf zfky1@_ijVv6dqWGy-u%)$jLmg5_^W%fyhZbumZc!da!?WA`dLb?j7zg5n0az4cHxV zZ%5<=9;nCe3GO9`^zuLsyPgYqwT=g}*m=Qq5$WN9I_%u*Jda2>4=ls5Gky;uT|BT9 zI~$zCh;;HmEq2n*DTs9Nz!Ge~;-nD4JWzw}`|X!gR+F|zdq(@9-ud|7|DSDa)&BpS zJy5m(ulE0$Eq~@f?f=v7O;P**bKm|@AzGqV`~P#_Bf=$&+W(*b78|wyFG(0!H=FO6 r+W-Gx$I5E|e}2NK{eMZqz_Q_d$JGA6#4)x1KR;pq-}nFH?+yGP=c8)( literal 28672 zcmeHPdvIJ;8NZL+``X=m(KxkM-=2S1L7Y#I696vJ^%+TqcAFtf{uvez~~4lj0&S8)X{dpqT_eZJ-g{SoH_C* z%$%L3yZbx8-?#Vt&X=A2&UZFDw{Izxi`j#7^D|TB?0{CM8HTnoo7FTe20u3ZRBr}6 z2(ju-uRf66$F$!0`%}RiS}c{*{1d^W{y})4Zps1WfO0@Npd3&RCF#cQ53y|F$TXCkg@xjg#p3M#BKI`L*u@QYhA#Agva51jx5JP(4Um|` zzokXso*GQd$m}IDy8tlf;-TVvcH53kTgP_n$!;FslO5YNdC8^;2(op2ViKlF!V4B8 zyK7?8_Fdyu#zN`3VqxF4<>Ers)3evLwsh=r@DygPYJ=IXa}6uKy<68xvj>XDs7+Ig zLC55ZTk}4k!nd1IhvAfO0@Npd3&RCulmp5E<-oh!fn{a~Dg0}D z0%jGdZ3P&0W``SY4lL~fz?}5pZyNklH|2nGKslfsP!1>ulmp5E<$!WPIiMU+4k!oS z`wrCWW;fX&IP2RgW*p|F2WP_B*}<9M+Te;{EU5GU;6LR*?BDIr`(u7x>c!MIQrD#} zPNC$Vk`E_uOimDaS{Q3CT96U}>5uDQ)jz8r)vwSm(tCBJ{a5>o_H;M} zH09}O-O=H17-%N>Hp?muuXhAp$>Yt~@~!hxfyYl{a?mf^dTOP zWBteaKhY*0k74~z{YU6x9yc*$-yP_KJZ@mUN8j6c5#a9Gd5E=t>)nmx06Q%Fr1ob2 z2RuCkS*vLs9VYprPWauT5zHL$VV?9~C{hf|6c>mTLo&sfNHHi=Y!oR5WQy}e3M^Bc zCsO2OigQJZewpI^B1NA}@jj8FSEe{eq}U)+tqTnQmmCJaw5eVnWA5$SS?fZi4^TJMXyNFCR1z>DY7y}k4TY` zDY`|9R+(bGNYNrwbcqzJWQtCaqFJWs5Ghv56zfEaCYfTbNU=huSR+!TWs21z1(GS+ zMT$n5qD`b|kSVevMZHXs5h<3-6s;n~GMS=9q^OfAR*4jWOwlY-_%g*xks>8iG>H^R znPP=Vk&r3UB84YYAd$kADH=rzN2X{HDQuadUZk*Oisd3jT&7qiQp9A6I+4PZDFTtg zkSO|mP{1cw_Oqx=kOBdGrhUhP)}*I5l2>SMHo`v-fW!ZCL;d6~pKndTI0?v%_ebM{ z`9^Q)v$a8LJgnWT{j9-_K3nTh`ce3Rc2A$PG#zqaz7T%2Cex`k#luh5wI_Y{(sbUD zpxN@=XgW*EmFi7T-PS*y>MDojHo*R}3Iye{20`FgD0tWQZ%?Qi&8lGES zk6LL!uD{m858>LAULE*!IsOkH>Sb#x{QmzG6wkpk!TrJQs{U8?f3$C+>i@_GRsXYZ z?5h5!U$_e2Tx%z-`c?hU>|Lq)UzjDT{%5j8)&IgQQT0EQC93`xW{Il*nJiKDzc5Qw z{jchOy5CXTB4G9l!uo$_{4p(f6?Onlz#hP-g2Tb?U}Mk;rvTpY|L8yIKj`1*-{@cE zZ}Ug}tnZ{wrhc3HVd`tCJ5$F}yHlg7bn;a4H_3;RcO++$o0DBhJMrhl6NxV+Zb9{&a=)#&h1Xg8F$+3|Jc8?AF=PU zXY5V(T3feXupYJUv5r_JG` z%As)v!Z^@XW;k6D2h%vP@NE^SBN!)b{)G@o-M zpP@g?a8^+cjYIMoIIpWZK&XIhE9xTIINjWqQ$!F-gGMuJ}L-QGy6nut0E5lhq zIW!K*XXvaloOHyY`J5y9482r_gD8i_A^8m5Q-;$RacDm0NIpZql;Jc`4vjQC;?R7~k$i^6C&Q_u92y7mIa3LHo#g$wxtY8_ z2ZTe@8L~N33A>$VGyI4V1%X`7RG{CloM#5sl&F`^idznMymW7vd2(-*QgQ!zP) z6)~b1kh__R!7<{5K~onpH&f9$Ml51PF(7X<6^&zJ2w5ws=mrg2=U^q|xJ@hdiAI5OX`WAE@j}PHQ zdtxWLmd6LNcQVn2j`8>ahI87lqic8^V>rG27&^-1ISgmIKZ6!|ydQg=-X2uu@jmRH z_Bzl4kN09Y6@CiM^Y{h~XT_gDM|ivk!wK@c&|x0$#%`}$Kyy649y@Qj8_+C|cVRe# zeg@6(cqfLF=})0)9`C?#Uj1|EY93#Q;S_rjeU!)7V#ja>P>ILaVEY+IM_2LqY7FP# ze}xY5csqvE@n1j(dAtq7nfa?xk;k*xcI-Slz~dQgJ!ji!KaaO!ICcLl+Q;K97|!B< F`G3yIG(`Xa diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index d62ad83..429960f 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -1,42 +1,50 @@ -use std::{ - collections::HashMap, fmt::Display, fs::File, path::Path -}; +use std::{collections::HashMap, fmt::Display, fs::File, path::Path, sync::Arc}; -use datafusion::arrow::datatypes::{Int16Type, Int32Type, Int64Type, UInt16Type, UInt32Type, UInt64Type}; +use datafusion::arrow::array::AsArray; +use datafusion::arrow::datatypes::{ + Int16Type, Int32Type, Int64Type, Int8Type, SchemaRef, UInt16Type, UInt32Type, UInt64Type, + UInt8Type, +}; +use datafusion::physical_optimizer::pruning::PruningPredicate; use datafusion::{ - datasource::physical_plan::parquet::{ParquetAccessPlan, RowGroupAccess, StatisticsConverter}, parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder, prelude::* + datasource::physical_plan::parquet::{ParquetAccessPlan, RowGroupAccess, StatisticsConverter}, + parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder, +}; +use datafusion_common::{internal_datafusion_err, DataFusionError, Result}; +use datafusion_expr::{col, lit}; +use datafusion_physical_expr::PhysicalExpr; +use datafusion_sql::unparser::expr_to_sql; +use sea_query::{ + Alias, Expr as SeaQExpr, OnConflict, Query, SimpleExpr, + SqliteQueryBuilder, }; -use datafusion::arrow::array::AsArray; -use datafusion_common::{internal_datafusion_err, DataFusionError, Result, ScalarValue}; -use datafusion_expr::Operator; -use sqlx::SqlitePool; -use sea_query::{Expr as SeaQExpr, Iden, OnConflict, Query, SimpleExpr, SqliteQueryBuilder, Value as SqlValue}; use sea_query_binder::SqlxBinder; +use sqlx::SqlitePool; /// SQLite secondary index for a set of parquet files /// /// It stores file-level data (filename and file size) as well as statistics for each column /// in each row group of each file. -/// +/// /// When we scan a table we push down filters to the index to get a list of row groups that match /// and hence the files that need to be read. -/// +/// /// It is possible for the index to store finer grained statistics or a complete row oriented index /// that filters down to individual rows within row groups. /// For example, if you have a table with an `id` column and you want to enable fast point lookups /// you could store the entire `id` column in the secondary index as a key/value map from `id` to /// (file_name, row_group, row_number) and use that to enable fast point lookups on parquet files. /// This is not implemented in this example. -/// +/// /// The index is implemented as a SQLite database with two tables: /// - `file_statistics` with columns `file_id`, `file_name`, `file_size_bytes`, `row_group_count`, `row_count` /// - `column_statistics` with columns `file_id`, `column_name`, `row_group`, `null_count`, `row_count`, /// and min/max values for each data type we support -/// +/// /// Here is roughly what `SELECT * FROM file_statistics` would look like: /// | file_id | file_name | file_size_bytes | row_group_count | row_count | /// | 1 | file1.parquet | 1234 | 3 | 1000 | -/// +/// /// And `SELECT * FROM column_statistics`: /// | file_id | column_name | row_group | null_count | row_count | int_min_value | int_max_value | string_min_value | string_max_value | /// |---------|-------------|-----------|------------|-----------|---------------|---------------|------------------|------------------| @@ -46,12 +54,36 @@ use sea_query_binder::SqlxBinder; /// | 1 | column2 | 0 | 0 | 1000 | | | a | c | /// | 1 | column2 | 1 | 0 | 1000 | | | c | x | /// | 1 | column2 | 2 | 0 | 1000 | | | x | z | -/// +/// +/// To do filtering on `column_statistics` we need to self-join the table on `file_id` and `row_group` for each column: +/// +/// ```sql +/// WITH column1_stats AS ( +/// SELECT file_id, row_group, int_min_value AS column1_min, int_max_value AS column1_max FROM column_statistics WHERE column_name = 'column1' +/// ), column2_stats AS ( +/// SELECT file_id, row_group, string_min_value AS column2_min, string_max_value AS column2_max FROM column_statistics WHERE column_name = 'column2' +/// ) +/// SELECT * +/// FROM column1_stats +/// JOIN column2_stats USING (file_id, row_group) +/// ``` +/// +/// Then to prune files we apply the filter to the joined table, let's call it `wide_column_statistics`: +/// +/// ```sql +/// SELECT file_name, file_size_bytes, row_group_count, row_group +/// FROM wide_column_statistics +/// JOIN file_statistics USING (file_id) +/// WHERE column1_min <= 10 AND column1_max >= 10 AND column2_min <= 'b' AND column2_max >= 'b' +/// ``` +/// /// While we use SQLite in this example, the index could be implemented with other databases or system. /// SQLite is just a convenient example that is also very similar to other RDBMS systems that you might use. #[derive(Debug)] pub struct SQLiteIndex { pool: SqlitePool, + /// The index for the schema. Not all columns in the table need to be indexed. + schema: SchemaRef, } impl Display for SQLiteIndex { @@ -62,65 +94,79 @@ impl Display for SQLiteIndex { } impl SQLiteIndex { - pub fn new(pool: SqlitePool) -> Self { - Self { pool } + pub fn new(pool: SqlitePool, schema: SchemaRef) -> Self { + Self { pool, schema } } /// Return the filenames / row groups that match the filter - /// + /// /// This function pushes down the filter to the index to get a list of row groups that match /// and hence the files that need to be read. - /// + /// /// The filter is pushed down to the index by converting it to a set of SQL expressions /// that can be evaluated by the index. - /// + /// /// The return value is a list of `(file_name, FileScanPlan)` tuples where `FileScanPlan` contains /// the file metadata and the row groups that need to be scanned. - pub async fn get_files(&self, filter: Option) -> Result> { - let (sql, values) = Query::select() - .columns(vec![ - FileStatistics::FileName, - FileStatistics::FileSizeBytes, - FileStatistics::RowGroupCount, - ]) - .column(ColumnStatistics::RowGroup) - .distinct() // could be distinct_on(vec![ColumnStatistics::FileId, ColumnStatistics::RowGroup]) if the backing store supports it - .from(FileStatistics::Table) - .inner_join( - ColumnStatistics::Table, - SeaQExpr::col((FileStatistics::Table, FileStatistics::FileId)).equals((ColumnStatistics::Table, ColumnStatistics::FileId)), - ) - .and_where_option(filter.and_then(|f| push_down_filter(&f))) - .build_sqlx(SqliteQueryBuilder); + pub async fn get_files( + &self, + filter: Arc, + schema: SchemaRef, + ) -> Result> { + let pruning = PruningPredicate::try_new(filter, schema.clone())?; + + let statistics_predicate = pruning.predicate_expr(); + + // TODO: we can either convert the PhysicalExpr to an Expr and use expr_to_sql + // Or we convert it manually into SeaQuery expressions + // The former is likely less code, the latter would generalize more (there is no guarantee + // the index database supports DataFusion flavored SQL) + let expr = col("value_min").gt(lit(1)); + let sql = format!( + "r# + WITH row_groups AS ( + SELECT row_group + FROM row_group_statistics + WHERE {} + ) + SELECT file_name, file_size_bytes, row_group_count, row_group + FROM row_groups + JOIN file_statistics USING (file_id) + #", + expr_to_sql(&expr)? + ); // TODO: we could aggregate the row groups into an array in the query to transmit less data over the wire // (and maybe avoid the join), leaving that as a TODO since it introduces more complexity and coupling to the index's backing store // Result is in the form of (file_name, file_size, row_group_count, row_group_to_scan) - let row_groups: Vec<(String, i64, i64, i64)> = sqlx::query_as_with(&sql, values) + let row_groups: Vec<(String, i64, i64, i64)> = sqlx::query_as(&sql) .fetch_all(&self.pool) - .await.unwrap(); // TODO: handle error, possibly failing gracefully by scanning all files? + .await + .unwrap(); // TODO: handle error, possibly failing gracefully by scanning all files? - let mut file_scans: HashMap = HashMap::new(); // file_name -> (file_size, row_groups) for (file_name, file_size, file_row_group_counts, row_group_to_scan) in row_groups { - let (_, access_plan) = file_scans.entry(file_name).or_insert((file_size, ParquetAccessPlan::new_none(file_row_group_counts as usize))); + let (_, access_plan) = file_scans.entry(file_name).or_insert(( + file_size, + ParquetAccessPlan::new_none(file_row_group_counts as usize), + )); // Here we could do finer grained row-level filtering, but this example does not implement that access_plan.set(row_group_to_scan as usize, RowGroupAccess::Scan) } - Ok( - file_scans.into_iter().map(|(file_name, (file_size, access_plan))| { + Ok(file_scans + .into_iter() + .map(|(file_name, (file_size, access_plan))| { ( file_name, FileScanPlan { file_size: file_size as u64, access_plan, - } + }, ) - }).collect() - ) - + }) + .collect()) } /// Add a new file to the index @@ -144,96 +190,140 @@ impl SQLiteIndex { let parquet_schema = reader.parquet_schema(); let row_groups = metadata.row_groups(); let row_counts = StatisticsConverter::row_group_row_counts(row_groups.iter())?; - let mut column_statistics: Vec = Vec::with_capacity(reader.schema().fields().len() * metadata.num_row_groups()); - - for column in 0..reader.schema().fields().len() { - let column_name = schema.field(column).name().clone(); + let mut row_group_statistics: Vec<_> = row_counts + .iter() + .enumerate() + .map(|(row_group, row_count)| { + RowGroupStatisticsInsert::new(row_group as i64, row_count.unwrap() as i64) + }) + .collect(); + + for field in self.schema.fields() { + let column_name = field.name().clone(); let converter = StatisticsConverter::try_new(&column_name, schema, parquet_schema)?; let min_values = converter.row_group_mins(row_groups.iter())?; let max_values = converter.row_group_maxes(row_groups.iter())?; let null_counts = converter.row_group_null_counts(row_groups.iter())?; let null_counts = null_counts.as_primitive::(); - for row_group in 0..reader.metadata().num_row_groups() { - let stats = ColumnStatisticsInsertBuilder::new( - column_name.clone(), - row_group as i64, - null_counts.value(row_group) as i64, - row_counts.value(row_group) as i64, - ); - // match on the data type of the column, downcast the array and extract the min/max values and build the statistics - match reader.schema().field(column).data_type() { + for row_group in 0..metadata.num_row_groups() { + match field.data_type() { datafusion::arrow::datatypes::DataType::Int8 => { - let min_values = min_values.as_primitive::(); - let max_values = max_values.as_primitive::(); + let min_values = min_values.as_primitive::(); + let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; let max = max_values.value(row_group) as i64; - let stats = stats.build(MinMaxStats::Int(min, max)); - column_statistics.push(stats); + let column_statistics = ColumnStatistics { + null_count: null_counts.value(row_group) as i64, + stats: MinMaxStats::Int(min, max), + }; + row_group_statistics[row_group] + .column_statistics + .push(column_statistics); } datafusion::arrow::datatypes::DataType::UInt8 => { - let min_values = min_values.as_primitive::(); - let max_values = max_values.as_primitive::(); + let min_values = min_values.as_primitive::(); + let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; let max = max_values.value(row_group) as i64; - let stats = stats.build(MinMaxStats::Int(min, max)); - column_statistics.push(stats); + let column_statistics = ColumnStatistics { + null_count: null_counts.value(row_group) as i64, + stats: MinMaxStats::Int(min, max), + }; + row_group_statistics[row_group] + .column_statistics + .push(column_statistics); } datafusion::arrow::datatypes::DataType::Int16 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; let max = max_values.value(row_group) as i64; - let stats = stats.build(MinMaxStats::Int(min, max)); - column_statistics.push(stats); + let column_statistics = ColumnStatistics { + null_count: null_counts.value(row_group) as i64, + stats: MinMaxStats::Int(min, max), + }; + row_group_statistics[row_group] + .column_statistics + .push(column_statistics); } datafusion::arrow::datatypes::DataType::UInt16 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; let max = max_values.value(row_group) as i64; - let stats = stats.build(MinMaxStats::Int(min, max)); - column_statistics.push(stats); + let column_statistics = ColumnStatistics { + null_count: null_counts.value(row_group) as i64, + stats: MinMaxStats::Int(min, max), + }; + row_group_statistics[row_group] + .column_statistics + .push(column_statistics); } datafusion::arrow::datatypes::DataType::Int32 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; let max = max_values.value(row_group) as i64; - let stats = stats.build(MinMaxStats::Int(min, max)); - column_statistics.push(stats); + let column_statistics = ColumnStatistics { + null_count: null_counts.value(row_group) as i64, + stats: MinMaxStats::Int(min, max), + }; + row_group_statistics[row_group] + .column_statistics + .push(column_statistics); } datafusion::arrow::datatypes::DataType::UInt32 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group) as i64; let max = max_values.value(row_group) as i64; - let stats = stats.build(MinMaxStats::Int(min, max)); - column_statistics.push(stats); + let column_statistics = ColumnStatistics { + null_count: null_counts.value(row_group) as i64, + stats: MinMaxStats::Int(min, max), + }; + row_group_statistics[row_group] + .column_statistics + .push(column_statistics); } datafusion::arrow::datatypes::DataType::Int64 => { let min_values = min_values.as_primitive::(); let max_values = max_values.as_primitive::(); let min = min_values.value(row_group); let max = max_values.value(row_group); - let stats = stats.build(MinMaxStats::Int(min, max)); - column_statistics.push(stats); + let column_statistics = ColumnStatistics { + null_count: null_counts.value(row_group) as i64, + stats: MinMaxStats::Int(min, max), + }; + row_group_statistics[row_group] + .column_statistics + .push(column_statistics); } datafusion::arrow::datatypes::DataType::Utf8 => { let min_values = min_values.as_string::(); let max_values = max_values.as_string::(); let min = min_values.value(row_group).to_string(); let max = max_values.value(row_group).to_string(); - let stats = stats.build(MinMaxStats::String(min, max)); - column_statistics.push(stats); + let column_statistics = ColumnStatistics { + null_count: null_counts.value(row_group) as i64, + stats: MinMaxStats::String(min, max), + }; + row_group_statistics[row_group] + .column_statistics + .push(column_statistics); } datafusion::arrow::datatypes::DataType::LargeUtf8 => { let min_values = min_values.as_string::(); let max_values = max_values.as_string::(); let min = min_values.value(row_group).to_string(); let max = max_values.value(row_group).to_string(); - let stats = stats.build(MinMaxStats::String(min, max)); - column_statistics.push(stats); + let column_statistics = ColumnStatistics { + null_count: null_counts.value(row_group) as i64, + stats: MinMaxStats::String(min, max), + }; + row_group_statistics[row_group] + .column_statistics + .push(column_statistics); } _ => {} // ignore other types, we just don't put them in the index and filters will not be pushed down } @@ -247,96 +337,104 @@ impl SQLiteIndex { row_count: metadata.file_metadata().num_rows(), }; - self.add_row(file_statistics, column_statistics).await?; + self.add_row(file_statistics, row_group_statistics).await?; Ok(()) } async fn add_row( &self, file_statistics: FileStatisticsInsert, - column_statistics: Vec, + row_group_statistics: Vec, ) -> anyhow::Result<()> { self.initialize().await?; let mut transaction = self.pool.begin().await?; let (sql, values) = Query::insert() - .into_table(FileStatistics::Table) - .columns(vec![ - FileStatistics::FileName, - FileStatistics::FileSizeBytes, - FileStatistics::RowGroupCount, - FileStatistics::RowCount, - ]) - .values_panic(vec![ - file_statistics.file_name.into(), - file_statistics.file_size_bytes.into(), - file_statistics.row_group_count.into(), - file_statistics.row_count.into(), - ]) - .on_conflict( - OnConflict::columns(vec![FileStatistics::FileName]).update_columns( - vec![ - FileStatistics::FileSizeBytes, - FileStatistics::RowGroupCount, - FileStatistics::RowCount, - ] - ).to_owned() - ) - .returning(Query::returning().column(FileStatistics::FileId)) - .build_sqlx(SqliteQueryBuilder); - let (file_id, ): (i64, ) = sqlx::query_as_with(&sql, values).fetch_one(&mut *transaction).await?; + .into_table(Alias::new("file_statistics")) + .columns(vec![ + Alias::new("file_name"), + Alias::new("file_size_bytes"), + Alias::new("row_group_count"), + Alias::new("row_count"), + ]) + .values_panic(vec![ + file_statistics.file_name.into(), + file_statistics.file_size_bytes.into(), + file_statistics.row_group_count.into(), + file_statistics.row_count.into(), + ]) + .on_conflict( + OnConflict::columns(vec![Alias::new("file_name")]) + .update_columns(vec![ + Alias::new("file_size_bytes"), + Alias::new("row_group_count"), + Alias::new("row_count"), + ]) + .to_owned(), + ) + .returning(Query::returning().column(Alias::new("file_id"))) + .build_sqlx(SqliteQueryBuilder); + let (file_id,): (i64,) = sqlx::query_as_with(&sql, values) + .fetch_one(&mut *transaction) + .await?; // Delete any existing column statistics for this file let (sql, values) = Query::delete() - .from_table(ColumnStatistics::Table) - .and_where(SeaQExpr::col(ColumnStatistics::FileId).eq(file_id)) + .from_table(Alias::new("row_group_statistics")) + .and_where(SeaQExpr::col(Alias::new("file_id")).eq(file_id)) .build_sqlx(SqliteQueryBuilder); - sqlx::query_with(&sql, values).execute(&mut *transaction).await?; - - for row_group_statistics in column_statistics { - let (sql, values) = Query::insert() - .into_table(ColumnStatistics::Table) - .columns(vec![ - ColumnStatistics::FileId, - ColumnStatistics::ColumnName, - ColumnStatistics::RowGroup, - ColumnStatistics::NullCount, - ColumnStatistics::RowCount, - ColumnStatistics::IntMinValue, - ColumnStatistics::IntMaxValue, - ColumnStatistics::StringMinValue, - ColumnStatistics::StringMaxValue, - ]) - .values_panic({ - match row_group_statistics.stats { - MinMaxStats::Int(min, max) => vec![ - file_id.into(), - row_group_statistics.column_name.into(), - row_group_statistics.row_group.into(), - row_group_statistics.null_count.into(), - row_group_statistics.row_count.into(), - min.into(), - max.into(), - SqlValue::String(None).into(), - SqlValue::String(None).into(), - ], - MinMaxStats::String(min, max) => vec![ - file_id.into(), - row_group_statistics.column_name.into(), - row_group_statistics.row_group.into(), - row_group_statistics.null_count.into(), - row_group_statistics.row_count.into(), - SqlValue::Int(None).into(), - SqlValue::Int(None).into(), - min.into(), - max.into(), - ], - }}) - .build_sqlx(SqliteQueryBuilder); - sqlx::query_with(&sql, values).execute(&mut *transaction).await?; + sqlx::query_with(&sql, values) + .execute(&mut *transaction) + .await?; + + let mut columns = vec![ + Alias::new("file_id"), + Alias::new("row_group"), + Alias::new("row_count"), + ]; + + for field in self.schema.fields() { + columns.push(Alias::new(format!("{}_null_count", field.name()))); + columns.push(Alias::new(format!("{}_min", field.name()))); + columns.push(Alias::new(format!("{}_max", field.name()))); + } + + let mut query = Query::insert() + .into_table(Alias::new("row_group_statistics")) + .columns(columns) + .to_owned(); + + for statistics in row_group_statistics { + let mut values: Vec = vec![ + file_id.into(), + statistics.row_group.into(), + statistics.row_count.into(), + ]; + for stats in statistics.column_statistics { + match stats.stats { + MinMaxStats::Int(min, max) => { + values.push(stats.null_count.into()); + values.push(min.into()); + values.push(max.into()); + } + MinMaxStats::String(min, max) => { + values.push(stats.null_count.into()); + values.push(min.into()); + values.push(max.into()); + } + } + } + + query = query.values_panic(values).to_owned(); } + let (sql, values) = query.build_sqlx(SqliteQueryBuilder); + + sqlx::query_with(&sql, values) + .execute(&mut *transaction) + .await?; + transaction.commit().await?; Ok(()) @@ -344,32 +442,38 @@ impl SQLiteIndex { /// Simple migration function that idempotently creates the table for the index pub async fn initialize(&self) -> anyhow::Result<()> { - let query = sea_query::Table::create() - .table(FileStatistics::Table) - .if_not_exists() - .col(sea_query::ColumnDef::new(FileStatistics::FileId).big_integer().auto_increment().primary_key()) - .col(sea_query::ColumnDef::new(FileStatistics::FileName).string().not_null().unique_key()) - .col(sea_query::ColumnDef::new(FileStatistics::FileSizeBytes).big_integer().not_null()) - .col(sea_query::ColumnDef::new(FileStatistics::RowGroupCount).big_integer().not_null()) - .col(sea_query::ColumnDef::new(FileStatistics::RowCount).big_integer().not_null()) - .build(SqliteQueryBuilder); - + let query = r#" + CREATE TABLE IF NOT EXISTS file_statistics ( + file_id INTEGER PRIMARY KEY AUTOINCREMENT, + file_name TEXT NOT NULL UNIQUE, + file_size_bytes INTEGER NOT NULL, + row_group_count INTEGER NOT NULL, + row_count INTEGER NOT NULL + ) + "#; sqlx::query(&query).execute(&self.pool).await?; - let query = sea_query::Table::create() - .table(ColumnStatistics::Table) - .if_not_exists() - .col(sea_query::ColumnDef::new(ColumnStatistics::FileId).big_integer().not_null()) - .col(sea_query::ColumnDef::new(ColumnStatistics::ColumnName).string().not_null()) - .col(sea_query::ColumnDef::new(ColumnStatistics::RowGroup).big_integer().not_null()) - .col(sea_query::ColumnDef::new(ColumnStatistics::NullCount).big_integer()) - .col(sea_query::ColumnDef::new(ColumnStatistics::RowCount).big_integer()) - .col(sea_query::ColumnDef::new(ColumnStatistics::IntMinValue).big_integer()) - .col(sea_query::ColumnDef::new(ColumnStatistics::IntMaxValue).big_integer()) - .col(sea_query::ColumnDef::new(ColumnStatistics::StringMinValue).string()) - .col(sea_query::ColumnDef::new(ColumnStatistics::StringMaxValue).string()) - .build(SqliteQueryBuilder); - + // The statistics columns are hardcoded in this example + // It would be up to you to decide if this is appropriate for your use case + // You could also store the statistics in a more flexible way, e.g. as a JSON blob or as an entity-attribute-value table + let query = r#" + CREATE TABLE IF NOT EXISTS column_statistics ( + file_id INTEGER NOT NULL, + row_group INTEGER NOT NULL, + row_count INTEGER NOT NULL, + file_name_null_count INTEGER NOT NULL, + file_name_min_value TEXT, + file_name_max_value TEXT, + value_null_count INTEGER NOT NULL, + value_min_value INTEGER, + value_max_value INTEGER, + text_null_count INTEGER NOT NULL, + text_min_value TEXT, + text_max_value TEXT, + PRIMARY KEY (file_id, column_name, row_group), + FOREIGN KEY (file_id) REFERENCES file_statistics(file_id) + ) + "#; sqlx::query(&query).execute(&self.pool).await?; Ok(()) @@ -382,31 +486,6 @@ pub struct FileScanPlan { pub access_plan: ParquetAccessPlan, } -#[derive(Debug, Clone, Iden)] -enum FileStatistics { - Table, - FileId, - FileName, - FileSizeBytes, - RowGroupCount, - RowCount, -} - -#[derive(Debug, Clone, Iden)] -enum ColumnStatistics { - Table, - FileId, - ColumnName, - RowGroup, - NullCount, - RowCount, - IntMinValue, - IntMaxValue, - StringMinValue, - StringMaxValue, - // Extend with other types as needed -} - #[derive(Debug, Clone)] pub enum MinMaxStats { Int(i64, i64), @@ -414,42 +493,27 @@ pub enum MinMaxStats { } #[derive(Debug, Clone)] -pub struct ColumnStatisticsInsert { - pub column_name: String, +pub struct RowGroupStatisticsInsert { pub row_group: i64, - pub null_count: i64, pub row_count: i64, - stats: MinMaxStats, + /// Per-column statistics + pub column_statistics: Vec, } - -#[derive(Debug, Clone)] -pub struct ColumnStatisticsInsertBuilder { - column_name: String, - row_group: i64, - null_count: i64, - row_count: i64, -} - -impl ColumnStatisticsInsertBuilder { - pub fn new(column_name: String, row_group: i64, null_count: i64, row_count: i64) -> Self { +impl RowGroupStatisticsInsert { + pub fn new(row_group: i64, row_count: i64) -> Self { Self { - column_name, row_group, - null_count, row_count, + column_statistics: vec![], } } +} - pub fn build(self, stats: MinMaxStats) -> ColumnStatisticsInsert { - ColumnStatisticsInsert { - column_name: self.column_name, - row_group: self.row_group, - null_count: self.null_count, - row_count: self.row_count, - stats, - } - } +#[derive(Debug, Clone)] +pub struct ColumnStatistics { + null_count: i64, + stats: MinMaxStats, } #[derive(Debug, Clone)] @@ -458,148 +522,4 @@ struct FileStatisticsInsert { file_size_bytes: i64, row_group_count: i64, row_count: i64, -} - - -pub fn push_down_filter(filter: &Expr) -> Option { - match filter { - Expr::BinaryExpr(binary_expr) => { - match (*binary_expr.left.clone(), *binary_expr.right.clone()) { - (Expr::Column(column), Expr::Literal(value)) => { - // This is something we can push down! - let column_name = column.name; - let filter = push_down_binary_filter(&value, &binary_expr.op); - filter.map(|filter| SeaQExpr::col(ColumnStatistics::ColumnName).eq(column_name).and(filter)) - } - (left, right) => { - let left_pushdown = push_down_filter(&left); - let right_pushdown = push_down_filter(&right); - match (left_pushdown, right_pushdown, binary_expr.op) { - (Some(left_pushdown), Some(right_pushdown), op) => { - match op { - Operator::And => { - Some(left_pushdown.and(right_pushdown)) - }, - Operator::Or => { - Some(left_pushdown.or(right_pushdown)) - }, - _ => { - None - } - } - } - // If we have A AND B but we can't push down B we can still push down A - // because A must be true for the whole expression to be true - (Some(left_pushdown), None, Operator::And) => Some(left_pushdown), - // Same for the other side - (None, Some(right_pushdown), Operator::And) => Some(right_pushdown), - _ => None - } - } - } - }, - Expr::Not(inner) => { - let inner_pushdown = push_down_filter(inner); - inner_pushdown.map(|inner_pushdown| inner_pushdown.not()) - }, - // We could handle more cases here, e.g. `LIKE`, `IN`, etc. - // But this example does not implement those to keep complexity under control - _ => None - } -} - -/// Push down a simple binary expression to the index -/// Only a subset of expressions are supported since `a = 1` has to be rewritten as `a_int_max_value >= 1 AND a_int_min_value <= 1` -fn push_down_binary_filter(value: &ScalarValue, op: &Operator) -> Option { - let (min_col, max_col, sql_value) = match value { - ScalarValue::Int8(v) => { - match v { - Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(Some(*v as i32))), - None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(None)), - } - }, - ScalarValue::UInt8(v) => { - match v { - Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(Some(*v as i32))), - None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(None)), - } - }, - ScalarValue::Int16(v) => { - match v { - Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(Some(*v as i32))), - None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(None)), - } - }, - ScalarValue::UInt16(v) => { - match v { - Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(Some(*v as i32))), - None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(None)), - } - }, - ScalarValue::Int32(v) => { - match v { - Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(Some(*v))), - None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::Int(None)), - } - }, - ScalarValue::UInt32(v) => { - match v { - Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::BigInt(Some(*v as i64))), - None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::BigInt(None)), - } - }, - ScalarValue::Int64(v) => { - match v { - Some(v) => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::BigInt(Some(*v))), - None => (ColumnStatistics::IntMinValue, ColumnStatistics::IntMaxValue, SqlValue::BigInt(None)), - } - }, - ScalarValue::Utf8(v) => { - match v { - Some(v) => (ColumnStatistics::StringMinValue, ColumnStatistics::StringMaxValue, SqlValue::String(Some(Box::new(v.clone())))), - None => (ColumnStatistics::StringMinValue, ColumnStatistics::StringMaxValue, SqlValue::String(None)), - } - }, - ScalarValue::LargeUtf8(v) => { - match v { - Some(v) => (ColumnStatistics::StringMinValue, ColumnStatistics::StringMaxValue, SqlValue::String(Some(Box::new(v.clone())))), - None => (ColumnStatistics::StringMinValue, ColumnStatistics::StringMaxValue, SqlValue::String(None)), - } - }, - _ => return None, - }; - let min_col = SeaQExpr::col(min_col); - let max_col = SeaQExpr::col(max_col); - let expr = match op { - Operator::Eq => { - min_col.lte(sql_value.clone()).and(max_col.gte(sql_value)) - }, - Operator::Gt => { - max_col.gt(sql_value) - }, - Operator::Lt => { - min_col.lt(sql_value) - }, - Operator::GtEq => { - max_col.gte(sql_value) - }, - Operator::LtEq => { - min_col.lte(sql_value) - }, - Operator::LikeMatch => { - // Find a prefix in the LIKE pattern and use it to filter - match sql_value { - SqlValue::String(Some(pattern)) => { - let mut prefix = pattern.clone(); - let prefix_len = prefix.chars().position(|c| c == '%').unwrap_or_else(|| prefix.len()); - prefix.truncate(prefix_len); - min_col.lte(SqlValue::String(Some(prefix.clone()))).and(max_col.gte(SqlValue::String(Some(prefix)))) - }, - _ => return None - } - }, - // In theory we could handle other operators, but this example does not implement that - _ => return None - }; - Some(expr) } \ No newline at end of file diff --git a/sqlx-sqlite/src/main.rs b/sqlx-sqlite/src/main.rs index 35c57b2..10fc7af 100644 --- a/sqlx-sqlite/src/main.rs +++ b/sqlx-sqlite/src/main.rs @@ -2,7 +2,7 @@ use std::{ any::Any, cell::RefCell, fmt::Display, fs::{self, DirEntry, File}, ops::Range, path::{Path, PathBuf}, sync::{Arc, Mutex} }; -use datafusion::arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; +use datafusion::arrow::{array::{ArrayRef, Int32Array, RecordBatch, StringArray}, datatypes::{DataType, Field, Schema}}; use datafusion::arrow::datatypes::SchemaRef; use async_trait::async_trait; use datafusion::{ @@ -100,7 +100,18 @@ async fn main() -> anyhow::Result<()> { let data = DemoData::try_new()?; // Create a table provider with and our special index. - let index = SQLiteIndex::new(pool.clone()); + let index = SQLiteIndex::new( + pool.clone(), + Arc::new( + Schema::new( + vec![ + Field::new("file_name", DataType::Utf8, false), + Field::new("value", DataType::Int32, false), + Field::new("text", DataType::Utf8, false), + ] + ) + ) + ); let provider = Arc::new(IndexTableProvider::try_new(data.path(), index).await?); println!("** Table Provider:"); println!("{provider}\n"); @@ -225,11 +236,17 @@ impl TableProvider for IndexTableProvider { // convert filters like [`a = 1`, `b = 2`] to a single filter like `a = 1 AND b = 2` let predicate = conjunction(filters.to_vec()); + let predicate = predicate + .map(|predicate| state.create_physical_expr(predicate, &df_schema)) + .transpose()? + // if there are no filters, use a literal true to have a predicate + // that always evaluates to true we can pass to the index + .unwrap_or_else(|| datafusion_physical_expr::expressions::lit(true)); // Use the index to find the files that might have data that matches the // predicate. Any file that can not have data that matches the predicate // will not be returned. - let files = self.index.get_files(predicate.clone()).await?; + let files = self.index.get_files(predicate.clone(), self.schema()).await?; // Record the last execution for debugging self.last_execution.lock().unwrap().get_mut().record(files.iter().map(|(filename, plan)| (filename.clone(), plan.access_plan.clone())).collect()); @@ -252,13 +269,6 @@ impl TableProvider for IndexTableProvider { ); } - let predicate = predicate - .map(|predicate| state.create_physical_expr(predicate, &df_schema)) - .transpose()? - // if there are no filters, use a literal true to have a predicate - // that always evaluates to true we can pass to the index - .unwrap_or_else(|| datafusion_physical_expr::expressions::lit(true)); - let exec = ParquetExec::builder(file_scan_config) .with_predicate(predicate) .build_arc(); From d3310d5d709e0bd9792bea61062eaf7ab7e8f715 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 10:56:54 -0500 Subject: [PATCH 10/21] Working example --- sqlx-sqlite/index.db | Bin 28672 -> 24576 bytes sqlx-sqlite/src/index.rs | 182 +++++++++++++++++++++++++++---------- sqlx-sqlite/src/main.rs | 44 ++++++++- sqlx-sqlite/src/rewrite.rs | 82 +++++++++++++++++ 4 files changed, 256 insertions(+), 52 deletions(-) create mode 100644 sqlx-sqlite/src/rewrite.rs diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index af3a784d3797343c1b5cd5f4774e621fa775795a..7068190602d5dc83a25e83c51c3e7e49ed22763e 100644 GIT binary patch literal 24576 zcmeI4e{3690mtv%+2`|}?_S)*ag#R9CH;|~&6<}VZKnZCTennA(>6`T3fjWm`J6a$ zY{x$m$E*W;r5XciuxVpLF!5VrnqbnP15prT{92I;nlum$q!m?KB|286zLDg)p3Ik3c!l#SXF1J=KhSEGv+C&^#o2jAfDvE>7y(9r5nu!u0Y-ok z_`e9;AM^Oa0|VehDrcApJCm9ppU9;0>G5pN$i=g{xRqV$oOkDFWN0j+j}7e@j_9o` zMOyWCy)_n3*yHhNs~%70>!RYALcOMvy z?ApJuUBl@n$a0Cy~&Y{CQhjvBujy+y6{E-2` z#gkFHkeyB7$GUMNpG!H%<0}o|c<)MQ#e;$v?&<(5JWe{DIkWW6mL2lfbuKgYc3EGxRcD~ScVsNGCo-zjBQSJi?7-d; zT)00nGSWhVsunIcs_S5*8G;4>V26`sL6wlM;X03*N%FanGK zBftnS0*nA7zz8q`jKIx7pg|D1HUaRJNBX{=w2{F}Gda)^-6mfQ^Vs46s90#xQqJ(v zFjz8rUE`a4RL1W~fWPM8>+sKb$2LZQ5nu!u0Y-okU<4QeMt~7u1Q-EEfDyR42?T{U z=fU4y6A%{9xJvr@KJaS+HgM{fKAY=y{5gSozlLl zJ)kAEG3|Cu*F^P#`l9-M^$GR3I;9>|2i3JIA2=U46Zmf63xQ7rVu9fR3anP%QO+r+ zm8X@*lzSCZ`JmFH)X7)nSL9#FPstC<1^FI%m)v>lV`cAv5nu!u0Y-okxOEA%6l4H^ z#Pd>9ldu&jCzNL^KiP`54{Ra(?b|ud_kbH2M9P@*Nn&lOM*1re+l>q$rBylXM7C91 zwmB_3+({Y(a9ne3V40ErU+WBW~n2C%Yf68b|XE=|1W}{(qC=qcUn5#NH_9-)c-jWskZbxEj~BWh5SAKaVOGOZRxAD zc+a_!P9$CPZzhpyOJAkM`N1QVy9AQ9B)-m(#U zja<~44VT*}hm5+lFc&ql;qoTRA%ktr7pw(**BdE^jIot3TJ!Va@&?KwLu=(sE58sf zucsU`qWIRVoDnX!QVtnND;={kR=C_kIb{5-ws@Wu`FERg}|&#Ekegb2?$pV4^|FS&PIw#aT0+G^a7q8p>INL{0pFIgvKw zm?)&2M&$jIsG4@hoWMji%4tB}C%rG5c=Fhos7g7jk+{dsw-%Qf^O&fga_W%hY0p{wo5YyIL?z0pMV|XS z-!U>VBZrClC}$P&4110l=}9AliHejHM4m>^J|j70q%l!1<Y*Hn zgpXlke~~PjA+h?VWJ%6 zD2V@?&}^7C>BGd`xcF0Q1B!ELH>t@VsSoHI!2wB5Pui{ zsd=-o5GvhEo%bPr6aS%nblwP+K1Q7v5qN{&n2qJFP^n0r_ag99aA|fjYllh;)OimA zp9RmQre-HXCHK24z8?XBd%#08iBvpPny2zS0^Q*5>Ez6GU1?4R__G*)H^zCyT?Jk7 z^z=+!DKFy>X!rE5xVI)U@l;(Yx0L1)_Zas}J2x>~Tgp0VP$}V2lzY^gv$JYxR`%l# z{G~$FTde&1|2xzRoO(&U0I$G{@H~74{vQ8d06&MP;4|<^_!am#dAON3;jEl2*`CT1>ke zuO95vc4*tREn1tlP77%PO;E4ix_@F~cg_ef0*nA7zz8q`i~u9R2rvSSz|BWM^!WWe zcakIff8d?ObAOjS0Pwvn@bdz98Sf>$f73bUxi_3+p1bsKH-mPg literal 28672 zcmeHPd2C$88GrY+S7OKGI8MB4=dcqyws(BRgd+h=NfQ#BID`OUahy$RaO}ihlK=$- zfl9PH23jbsT8;uOEmB3*ib6#X0Tq;jTG|4oK!w_>0tr!&=mGTn-h1O?#>2p$p_)yc z-S>XK-?uxz_l@&?GrPN^f6H*GnA<-#esHjq>(CZxhM{f9DViSknh^ zug`h>K@U?}>-b%%%)6SE%4_L|Ge1c0hX?AW98eA@2b2TK0p);lKslfsP!1>uJ`N6C zi!HahtxdnKvNX7Nq&PG-GI?;cFi{#T4NsJYhbHEouut3Gw{f5^H?Z;4Eq%F$d3Zyv zDc7)nc%)bu-q(=ZJA7bxw3OSrZ6LRG=awxkGlJ2 zCr3sKLt~SprD+txg$n*0JY(VD@Mz)E!I4RLlK3?MH#$Ffbl&-i()jS`fw_|Kb%9_S znAJotc=g$)Q{CLG?~W{+I6ML^XJMjvc(OP;ROAjV#x4$Q3SDRcnY{oo=Hh|kc<$`&o6g#}eRpni-|pPToder8ZG|9b^=%!1VMgHv z7bLfH>!$vlebbDI;meDKy_c1W6Vsl~y{az%+p%tt<3M?PTiCP$^qqoazHtt98eA@2b2TK0p);lKslfsIKCZNXf{TT z|IC_zIelhkDZp4@Hu~Y>!0Z|Tj7iVDp}}8uQw}Hxlmp5E<$!WPIiMU+4k!nd1IhvA zfO6pD??9z)Hb)Btb3Uz7XT!Mk%=_W!oXq>??dCP+keN6BYrJkeYdl~~!KVVgniD$v6r+d;?>Q|{dQkSIGrR?OdlXoUZlD$bM@tee5iGzs~5^nHZ@U>txSRZ)) z^Zwobn17=0doOtRc!#``yukgfd#`)gJ=sk-FFId$#+_4~r2RYl8}@{~!A@B(S&v!w zTQ^%rt%7xiwbqz4E-+3r))))*clB5GC-iUYx9C^sd-Y9vyI!sRNBgt(j4?I+2a$ZB zSL>b*Utyq`(T7m}*hdg);{k;2MRp$|tvpbLtv}k8h^*y-McDd| z^#?>+c%TwnS6L4r(#!)D*xF)UiO3orSct79)@DRj^S}aZzHZeZ(!>K9Y<}N-4UtA3 zNMrLl^Fc&b@jwcj+s$K$tmJ_tHgje_A}e?xfsMbISwxoeK!A-O8gC-fzym%uK5IOJ zNIegD*f`g?0g)UJxY$@_?5ZnY6zEmH2VX5wTWf@$9m3&1`L6cpuD7?YTmXVMkI)_y zI(w_i7eZ)=5axgpcs6Dx;ya&#MP@g@&PrJ3Pd_kaRqbF=^jNVaG2tU_@nK#fd z2W{o^K@q)zkEp0EpBMP?plWJn#uN{!x3{Le8_+0Tb@dDy4X3xFd~V=}gVDi|cr18m zVR;vv1VLy*d@|o!J}2;}hY`Nq*-^}_^fAkRt>v8{priOG0{H!4>xg9e>*iqnuf|G5 z>UhA$`eXV#h%Dm)3+p%Ok0P>^2TZJAsNaZ4Ee{x2Z_)QuktG1oD;v*lti7){SCJYx z4?~%q^;r9v_Fg+#3>PYLsj<6S{6NYW*hREZ>=QpqBb zq(dsH6iKjDQX!J$rILjrNxM|CKqP6CN-`ozt5lK}N!CgwDUqZ_DoKhY%~DB1Bv~Vs z1R}|5sl*panxqm>Bx#gNT#;mzRN{yvE2R=!Bv~PqSR%=Csl*gX8l)0KB&nB5bde+{ zm1rVKRwgN5Et1qpC0B_g%cPPkMUtgb$)`nH1$)rfKNGd6bB$ZOhgh)~$m5hrd3#F37BFO@&6_)^KRNa9H)pAt!2spNo2;z%X?MG{*oDT*YP zRI*PbF{P3rk;ITl+RA%DLYtG>3RbEZ1O+YYKSTi)y-V`p=l^%v>*LS=KM1}4vCPGp z{>-{eHshq+PHD5&KMgg{@hySPxsbScjmYZZiL2K4E@${u%}hQQbZk4ygX0u6P`G{eOLV02W@N zZhF}=GhCla2I2|#khsNm%<80$Nm~v>Go-ocj80RdG(;jl_r=Vf_FiWPOLt;1r zOQxVDVmJazrl1F6I08$ipy^>a0!yZ#%V9VIOQxW$VK@Ryrl5~uIFThK+)b_R25YCF zabY-Zv9(>)+Agq`K&QfRS}BLd>4G=}S`&t|Hs*BFIGte}=tUS#3+2!_onahkJ{V4O z%;}(UI>I>6Z7`fQltbfmgmIv~U^uH|4yJKnF4Pm~Cm2o><!)c5;bh#7f z!#L0}Fq~DCL*wMbIM5<6oRu-BoxYy-Fb?zt3}*%9&^Ya39B2X<&hm(pr&AnW4sf9A zXE+U%6UWJiQ=CAV&v5Ev4xQrpaEcSC>lsdta%h}ziW4a08BR9l&?%k|r#OMio#E6` z4viB|aRQ|}!&w${=oHU~Q=C9;&Ty7e4viD$Gl629;nc<)n$P(tpP~9@I7=vp#)I6LV-j!zG31g8G`_ET$Y9C(36is2NUm%%S<5kMbERXoiC*hsKHW8A@h`Qx$V) zKIfx+h8mgSETSA5C(36iiWyF2%$Z)kkMbF+V1`pcIW$g`&rtR!SZjsDBZL>!vVkj+^V?k3D-q+>=L1adh`U^{_fq#_1QWys_# zfsF-*k&GE}49MdwfjtI>k%$;HjUkJ(1ojjdMi4XN7?8tRavsOquBeM6D5JM1<+zz_3w7!Wts#gcw2`0kRblC?MF) z!XBLXOyVI#plV=&Zk!lMT#pEp3M|lt6Ag(Sh(LY70-ZQ`J5i6w79Qxp!HlQfk<)l!4fg)!uSMik9$1aNpL_pAWFrqWVed=cQ;2Nf zfky1@_ijVv6dqWGy-u%)$jLmg5_^W%fyhZbumZc!da!?WA`dLb?j7zg5n0az4cHxV zZ%5<=9;nCe3GO9`^zuLsyPgYqwT=g}*m=Qq5$WN9I_%u*Jda2>4=ls5Gky;uT|BT9 zI~$zCh;;HmEq2n*DTs9Nz!Ge~;-nD4JWzw}`|X!gR+F|zdq(@9-ud|7|DSDa)&BpS zJy5m(ulE0$Eq~@f?f=v7O;P**bKm|@AzGqV`~P#_Bf=$&+W(*b78|wyFG(0!H=FO6 r+W-Gx$I5E|e}2NK{eMZqz_Q_d$JGA6#4)x1KR;pq-}nFH?+yGP=c8)( diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index 429960f..b4f15b9 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -2,24 +2,24 @@ use std::{collections::HashMap, fmt::Display, fs::File, path::Path, sync::Arc}; use datafusion::arrow::array::AsArray; use datafusion::arrow::datatypes::{ - Int16Type, Int32Type, Int64Type, Int8Type, SchemaRef, UInt16Type, UInt32Type, UInt64Type, - UInt8Type, + DataType, Int16Type, Int32Type, Int64Type, Int8Type, SchemaRef, UInt16Type, UInt32Type, UInt64Type, UInt8Type }; use datafusion::physical_optimizer::pruning::PruningPredicate; use datafusion::{ datasource::physical_plan::parquet::{ParquetAccessPlan, RowGroupAccess, StatisticsConverter}, parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder, }; -use datafusion_common::{internal_datafusion_err, DataFusionError, Result}; -use datafusion_expr::{col, lit}; +use datafusion_common::tree_node::TreeNode; +use datafusion_common::{internal_datafusion_err, DataFusionError, Result, tree_node::{Transformed, TransformedResult}}; use datafusion_physical_expr::PhysicalExpr; -use datafusion_sql::unparser::expr_to_sql; use sea_query::{ - Alias, Expr as SeaQExpr, OnConflict, Query, SimpleExpr, - SqliteQueryBuilder, + Alias, ColumnDef, CommonTableExpression, Expr as SeaQExpr, ForeignKey, ForeignKeyAction, Index, OnConflict, Query, SimpleExpr, SqliteQueryBuilder, Table, WithClause }; use sea_query_binder::SqlxBinder; use sqlx::SqlitePool; +use datafusion_physical_expr::expressions as phys_expr; + +use crate::rewrite::physical_expr_to_sea_query; /// SQLite secondary index for a set of parquet files /// @@ -113,33 +113,55 @@ impl SQLiteIndex { filter: Arc, schema: SchemaRef, ) -> Result> { + // Convert the predicate to a pruning predicate + // This transforms e.g. `a = 5` to `a_min <= 5 AND a_max >= 5` let pruning = PruningPredicate::try_new(filter, schema.clone())?; + let predicate = pruning.predicate_expr().clone(); + // Replace any `{col}_row_count` with `row_count` as we don't store per-column row counts + let predicate = predicate.transform(|expr| { + if let Some(column) = expr.as_any().downcast_ref::() { + if column.name().ends_with("_row_count") { + let column = phys_expr::Column::new(column.name().trim_end_matches("_row_count"), column.index()); + return Ok(Transformed::yes(Arc::new(column))); + } + } + Ok(Transformed::no(expr)) + }).data()?; + // Convert a DataFusion PhysicalExpr to a SeaQuery SimpleExpr + let predicate = physical_expr_to_sea_query(&predicate); - let statistics_predicate = pruning.predicate_expr(); - - // TODO: we can either convert the PhysicalExpr to an Expr and use expr_to_sql - // Or we convert it manually into SeaQuery expressions - // The former is likely less code, the latter would generalize more (there is no guarantee - // the index database supports DataFusion flavored SQL) - let expr = col("value_min").gt(lit(1)); - let sql = format!( - "r# - WITH row_groups AS ( - SELECT row_group - FROM row_group_statistics - WHERE {} - ) - SELECT file_name, file_size_bytes, row_group_count, row_group - FROM row_groups - JOIN file_statistics USING (file_id) - #", - expr_to_sql(&expr)? - ); - - // TODO: we could aggregate the row groups into an array in the query to transmit less data over the wire - // (and maybe avoid the join), leaving that as a TODO since it introduces more complexity and coupling to the index's backing store - // Result is in the form of (file_name, file_size, row_group_count, row_group_to_scan) - let row_groups: Vec<(String, i64, i64, i64)> = sqlx::query_as(&sql) + let stats_query = Query::select() + .from(Alias::new("row_group_statistics")) + .columns(vec![ + Alias::new("file_id"), + Alias::new("row_group"), + ]) + .and_where(predicate).to_owned(); + + let cte = CommonTableExpression::new() + .query(stats_query) + .table_name(Alias::new("row_groups")).to_owned(); + + let files_query = Query::select() + .from(Alias::new("file_statistics")) + .columns(vec![ + Alias::new("file_name"), + Alias::new("file_size_bytes"), + Alias::new("row_group_count"), + ]) + .inner_join( + Alias::new("row_groups"), + SeaQExpr::col((Alias::new("file_statistics"), Alias::new("file_id"))).equals((Alias::new("row_groups"), Alias::new("file_id"))), + ) + .column(Alias::new("row_group")) + .distinct() + .to_owned(); + + let query = files_query.with(WithClause::new().cte(cte).to_owned()); + + let (sql, values) = query.build_sqlx(SqliteQueryBuilder); + + let row_groups: Vec<(String, i64, i64, i64)> = sqlx::query_as_with(&sql, values) .fetch_all(&self.pool) .await .unwrap(); // TODO: handle error, possibly failing gracefully by scanning all files? @@ -456,30 +478,90 @@ impl SQLiteIndex { // The statistics columns are hardcoded in this example // It would be up to you to decide if this is appropriate for your use case // You could also store the statistics in a more flexible way, e.g. as a JSON blob or as an entity-attribute-value table - let query = r#" - CREATE TABLE IF NOT EXISTS column_statistics ( - file_id INTEGER NOT NULL, - row_group INTEGER NOT NULL, - row_count INTEGER NOT NULL, - file_name_null_count INTEGER NOT NULL, - file_name_min_value TEXT, - file_name_max_value TEXT, - value_null_count INTEGER NOT NULL, - value_min_value INTEGER, - value_max_value INTEGER, - text_null_count INTEGER NOT NULL, - text_min_value TEXT, - text_max_value TEXT, - PRIMARY KEY (file_id, column_name, row_group), - FOREIGN KEY (file_id) REFERENCES file_statistics(file_id) + // let query = r#" + // CREATE TABLE IF NOT EXISTS row_group_statistics ( + // file_id INTEGER NOT NULL, + // row_group INTEGER NOT NULL, + // row_count INTEGER NOT NULL, + // PRIMARY KEY (file_id, row_group), + // FOREIGN KEY (file_id) REFERENCES file_statistics(file_id) + // ) + // "#; + // sqlx::query(&query).execute(&self.pool).await?; + + let sql = Table::create() + .table(Alias::new("file_statistics")) + .if_not_exists() + .col(ColumnDef::new(Alias::new("file_id")).integer().primary_key().auto_increment()) + .col(ColumnDef::new(Alias::new("file_name")).string().not_null().unique_key()) + .col(ColumnDef::new(Alias::new("file_size_bytes")).integer().not_null()) + .col(ColumnDef::new(Alias::new("row_group_count")).integer().not_null()) + .col(ColumnDef::new(Alias::new("row_count")).integer().not_null()) + .to_owned() + .build(SqliteQueryBuilder); + + sqlx::query(&sql).execute(&self.pool).await?; + + let mut table = Table::create() + .table(Alias::new("row_group_statistics")) + .if_not_exists() + .col(ColumnDef::new(Alias::new("file_id")).integer().not_null()) + .col(ColumnDef::new(Alias::new("row_group")).integer().not_null()) + .col(ColumnDef::new(Alias::new("row_count")).integer().not_null()) + .primary_key(Index::create().col(Alias::new("file_id")).col(Alias::new("row_group"))) + .foreign_key( + ForeignKey::create() + .from(Alias::new("row_group_statistics"), Alias::new("file_id")) + .to(Alias::new("file_statistics"), Alias::new("file_id")) + .on_delete(ForeignKeyAction::Cascade) ) - "#; - sqlx::query(&query).execute(&self.pool).await?; + .to_owned(); + + for field in self.schema.fields().iter() { + table.col( + ColumnDef::new(Alias::new(format!("{}_null_count", field.name()))) + .integer() + .not_null() + ); + for suffix in ["min", "max"] { + let mut stats_col = ColumnDef::new(Alias::new(format!("{}_{}", field.name(), suffix))); + set_column_type(&mut stats_col, field.data_type().clone()); + if !field.is_nullable() { + stats_col.not_null(); + } + table.col(&mut stats_col); + } + } + + let sql = table.build(SqliteQueryBuilder); + + sqlx::query(&sql).execute(&self.pool).await?; Ok(()) } } +fn set_column_type(column: &mut ColumnDef, field_type: DataType) -> &mut ColumnDef { + match field_type { + DataType::Int8 => column.tiny_integer(), + DataType::UInt8 => column.tiny_unsigned(), + DataType::Int16 => column.small_integer(), + DataType::UInt16 => column.small_unsigned(), + DataType::Int32 => column.integer(), + DataType::UInt32 => column.unsigned(), + DataType::Int64 => column.big_integer(), + DataType::UInt64 => column.big_unsigned(), + DataType::Float32 => column.float(), + DataType::Float64 => column.double(), + DataType::Utf8 => column.string(), + DataType::LargeUtf8 => column.string(), + DataType::Binary => column.binary(), + DataType::FixedSizeBinary(_) => column.binary(), + DataType::LargeBinary => column.binary(), + _ => todo!("Add support for more types"), + } +} + #[derive(Debug, Clone)] pub struct FileScanPlan { pub file_size: u64, diff --git a/sqlx-sqlite/src/main.rs b/sqlx-sqlite/src/main.rs index 10fc7af..3ceb315 100644 --- a/sqlx-sqlite/src/main.rs +++ b/sqlx-sqlite/src/main.rs @@ -25,6 +25,7 @@ use url::Url; use crate::index::SQLiteIndex; mod index; +mod rewrite; /// This example demonstrates building a secondary index over multiple Parquet /// files and using that index during query to skip ("prune") files and row groups @@ -102,10 +103,13 @@ async fn main() -> anyhow::Result<()> { // Create a table provider with and our special index. let index = SQLiteIndex::new( pool.clone(), + // You probably don't want to index _every_ column in your data + // For example, indexing a column of random strings like a UUID would be pointless + // using a min/max based index like this. + // In this example we choose to index only the "value" and "text" columns, ignoring "file_name" Arc::new( Schema::new( vec![ - Field::new("file_name", DataType::Utf8, false), Field::new("value", DataType::Int32, false), Field::new("text", DataType::Utf8, false), ] @@ -157,6 +161,30 @@ async fn main() -> anyhow::Result<()> { .await?; println!("Files scanned: {:?}\n", provider.last_execution()); + + // it's also possible to combine predicates on multiple columns + // for example `value < 20 AND text = 'a'` would only read file 1 + // while `value > 500 AND text = 'a'` would read no files + println!("** Select data, predicate `value < 20 AND text = 'a'`"); + ctx.sql( + "SELECT file_name, count(value) FROM index_table \ + WHERE value < 20 AND text = 'a' GROUP BY file_name", + ) + .await? + .show() + .await?; + println!("Files scanned: {:?}\n", provider.last_execution()); + + println!("** Select data, predicate `value > 500 AND text = 'a'`"); + ctx.sql( + "SELECT file_name, count(value) FROM index_table \ + WHERE value > 500 AND text = 'a' GROUP BY file_name", + ) + .await? + .show() + .await?; + println!("Files scanned: {:?}\n", provider.last_execution()); + pool.close().await; Ok(()) @@ -345,8 +373,20 @@ fn make_demo_file(path: impl AsRef, value_range: Range) -> Result<()> let num_values = value_range.len(); let file_names = StringArray::from_iter_values(std::iter::repeat(&filename).take(num_values)); let values = Int32Array::from_iter_values(value_range.clone()); + + fn int_to_chars(mut n: i32) -> String { + let mut result = String::new(); + while n > 0 { + n -= 1; + let c = (n % 26) as u8 + b'a'; + result.push(c as char); + n /= 26; + } + result.chars().rev().collect() + } + let texts: StringArray = value_range - .map(|i| format!("text{}", i)) + .map(int_to_chars) .collect::>() .into(); let batch = RecordBatch::try_from_iter(vec![ diff --git a/sqlx-sqlite/src/rewrite.rs b/sqlx-sqlite/src/rewrite.rs new file mode 100644 index 0000000..52e9e63 --- /dev/null +++ b/sqlx-sqlite/src/rewrite.rs @@ -0,0 +1,82 @@ +use datafusion_common::ScalarValue; +use datafusion_expr::Operator; +use datafusion_physical_expr::{expressions as phys_expr, PhysicalExprRef}; +use sea_query::{Alias, BinOper, CaseStatement, ColumnRef, IntoIden, SimpleExpr, Value}; + + +pub fn physical_expr_to_sea_query(expr: &PhysicalExprRef) -> SimpleExpr { + if let Some(expr) = expr.as_any().downcast_ref::(){ + let left = physical_expr_to_sea_query(expr.left()); + let right = physical_expr_to_sea_query(expr.right()); + match expr.op() { + Operator::Eq => left.binary(BinOper::Equal, right), + Operator::NotEq => left.binary(BinOper::NotEqual, right), + Operator::Lt => left.binary(BinOper::SmallerThan, right), + Operator::LtEq => left.binary(BinOper::SmallerThanOrEqual, right), + Operator::Gt => left.binary(BinOper::GreaterThan, right), + Operator::GtEq => left.binary(BinOper::GreaterThanOrEqual, right), + Operator::Plus => left.binary(BinOper::Add, right), + Operator::Minus => left.binary(BinOper::Sub, right), + Operator::Multiply => left.binary(BinOper::Mul, right), + Operator::Divide => left.binary(BinOper::Div, right), + Operator::Modulo => left.binary(BinOper::Mod, right), + Operator::And => left.binary(BinOper::And, right), + Operator::Or => left.binary(BinOper::Or, right), + Operator::LikeMatch => left.binary(BinOper::Like, right), + Operator::NotLikeMatch => left.binary(BinOper::NotLike, right), + Operator::BitwiseShiftLeft => left.binary(BinOper::LShift, right), + Operator::BitwiseShiftRight => left.binary(BinOper::RShift, right), + _ => SimpleExpr::Constant(Value::Bool(Some(true))) + } + } else if let Some(expr) = expr.as_any().downcast_ref::() { + SimpleExpr::Column(ColumnRef::Column(Alias::new(expr.name().to_string()).into_iden())) + } else if let Some(expr) = expr.as_any().downcast_ref::() { + match expr.value() { + ScalarValue::Null => SimpleExpr::Keyword(sea_query::Keyword::Null), + ScalarValue::Boolean(v) => SimpleExpr::Constant(Value::Bool(*v)), + ScalarValue::Float32(v) => SimpleExpr::Constant(Value::Float(*v)), + ScalarValue::Float64(v) => SimpleExpr::Constant(Value::Double(*v)), + ScalarValue::Int8(v) => SimpleExpr::Constant(Value::TinyInt(*v)), + ScalarValue::Int16(v) => SimpleExpr::Constant(Value::SmallInt(*v)), + ScalarValue::Int32(v) => SimpleExpr::Constant(Value::Int(*v)), + ScalarValue::Int64(v) => SimpleExpr::Constant(Value::BigInt(*v)), + ScalarValue::UInt8(v) => SimpleExpr::Constant(Value::TinyUnsigned(*v)), + ScalarValue::UInt16(v) => SimpleExpr::Constant(Value::SmallUnsigned(*v)), + ScalarValue::UInt32(v) => SimpleExpr::Constant(Value::Unsigned(*v)), + ScalarValue::UInt64(v) => SimpleExpr::Constant(Value::BigUnsigned(*v)), + ScalarValue::Utf8(v) => match v { + Some(v) => SimpleExpr::Constant(Value::String(Some(Box::new(v.to_string())))), + None => SimpleExpr::Constant(Value::String(None)), + }, + ScalarValue::LargeUtf8(v) => match v { + Some(v) => SimpleExpr::Constant(Value::String(Some(Box::new(v.to_string())))), + None => SimpleExpr::Constant(Value::String(None)), + }, + ScalarValue::Binary(v) => match v { + Some(v) => SimpleExpr::Constant(Value::Bytes(Some(Box::new(v.to_vec())))), + None => SimpleExpr::Constant(Value::Bytes(None)), + }, + ScalarValue::FixedSizeBinary(_, v) => match v { + Some(v) => SimpleExpr::Constant(Value::Bytes(Some(Box::new(v.to_vec())))), + None => SimpleExpr::Constant(Value::Bytes(None)), + }, + ScalarValue::LargeBinary(v) => match v { + Some(v) => SimpleExpr::Constant(Value::Bytes(Some(Box::new(v.to_vec())))), + None => SimpleExpr::Constant(Value::Bytes(None)), + }, + // Extend with other types, e.g. to support arrays, dates, etc. + _ => SimpleExpr::Constant(Value::Bool(Some(true))) + } + } else if let Some(expr) = expr.as_any().downcast_ref::() { + let mut case = CaseStatement::new(); + for (when, then) in expr.when_then_expr() { + case = case.case(physical_expr_to_sea_query(when), physical_expr_to_sea_query(then)); + } + if let Some(else_exp) = expr.else_expr() { + case = case.finally(physical_expr_to_sea_query(else_exp)); + }; + SimpleExpr::Case(Box::new(case)) + } else { + SimpleExpr::Constant(Value::Bool(Some(true))) + } +} \ No newline at end of file From 4fde60a0ef20e581b9e5f50f8df38361e6747017 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 10:58:18 -0500 Subject: [PATCH 11/21] Add comments --- sqlx-sqlite/src/index.rs | 1 + sqlx-sqlite/src/rewrite.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index b4f15b9..ea2ea9b 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -118,6 +118,7 @@ impl SQLiteIndex { let pruning = PruningPredicate::try_new(filter, schema.clone())?; let predicate = pruning.predicate_expr().clone(); // Replace any `{col}_row_count` with `row_count` as we don't store per-column row counts + // (they're the same for all columns in a row group) let predicate = predicate.transform(|expr| { if let Some(column) = expr.as_any().downcast_ref::() { if column.name().ends_with("_row_count") { diff --git a/sqlx-sqlite/src/rewrite.rs b/sqlx-sqlite/src/rewrite.rs index 52e9e63..3a75240 100644 --- a/sqlx-sqlite/src/rewrite.rs +++ b/sqlx-sqlite/src/rewrite.rs @@ -4,6 +4,7 @@ use datafusion_physical_expr::{expressions as phys_expr, PhysicalExprRef}; use sea_query::{Alias, BinOper, CaseStatement, ColumnRef, IntoIden, SimpleExpr, Value}; +/// Convert a DataFusion PhysicalExpr to a SeaQuery SimpleExpr pub fn physical_expr_to_sea_query(expr: &PhysicalExprRef) -> SimpleExpr { if let Some(expr) = expr.as_any().downcast_ref::(){ let left = physical_expr_to_sea_query(expr.left()); From 69855f76748a7e4a1e9efd09bcfdd27c36f075df Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 10:58:45 -0500 Subject: [PATCH 12/21] Update README.md --- sqlx-sqlite/README.md | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/sqlx-sqlite/README.md b/sqlx-sqlite/README.md index 6135e60..ebdc72d 100644 --- a/sqlx-sqlite/README.md +++ b/sqlx-sqlite/README.md @@ -24,18 +24,18 @@ SQLiteIndex() +---------------+-------+ | file_name | value | +---------------+-------+ -| file2.parquet | 100 | -| file2.parquet | 101 | -| file2.parquet | 102 | -| file2.parquet | 103 | -| file2.parquet | 104 | -| file2.parquet | 105 | -| file2.parquet | 106 | -| file2.parquet | 107 | -| file2.parquet | 108 | -| file2.parquet | 109 | +| file1.parquet | 0 | +| file1.parquet | 1 | +| file1.parquet | 2 | +| file1.parquet | 3 | +| file1.parquet | 4 | +| file1.parquet | 5 | +| file1.parquet | 6 | +| file1.parquet | 7 | +| file1.parquet | 8 | +| file1.parquet | 9 | +---------------+-------+ -Files scanned: [("file2.parquet", ParquetAccessPlan { row_groups: [Scan, Scan] }), ("file1.parquet", ParquetAccessPlan { row_groups: [Scan, Scan] }), ("file3.parquet", ParquetAccessPlan { row_groups: [Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan] })] +Files scanned: [("file1.parquet", ParquetAccessPlan { row_groups: [Scan, Scan] }), ("file2.parquet", ParquetAccessPlan { row_groups: [Scan, Scan] }), ("file3.parquet", ParquetAccessPlan { row_groups: [Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan] })] ** Select data, predicate `value = 150` +---------------+-------+ @@ -53,6 +53,21 @@ Files scanned: [("file2.parquet", ParquetAccessPlan { row_groups: [Skip, Scan] } | file3.parquet | 2499 | +---------------+--------------------------+ Files scanned: [("file1.parquet", ParquetAccessPlan { row_groups: [Scan, Skip] }), ("file3.parquet", ParquetAccessPlan { row_groups: [Skip, Skip, Skip, Skip, Skip, Skip, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan, Scan] })] + +** Select data, predicate `value < 20 AND text = 'a'` ++---------------+--------------------------+ +| file_name | COUNT(index_table.value) | ++---------------+--------------------------+ +| file1.parquet | 1 | ++---------------+--------------------------+ +Files scanned: [("file1.parquet", ParquetAccessPlan { row_groups: [Scan, Skip] })] + +** Select data, predicate `value > 500 AND text = 'a'` ++-----------+--------------------------+ +| file_name | COUNT(index_table.value) | ++-----------+--------------------------+ ++-----------+--------------------------+ +Files scanned: [] ``` As you can see the index is being used to select which row groups to read from the Parquet files. From a49c8173447f6be49074700231204b586ae0ff2f Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 10:59:03 -0500 Subject: [PATCH 13/21] replace db --- sqlx-sqlite/index.db | Bin 24576 -> 4096 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index 7068190602d5dc83a25e83c51c3e7e49ed22763e..4ebf78cf96088c7148cb47caf804cc036de75a34 100644 GIT binary patch delta 33 ncmZoTz}TQLL7J73fq{W>Vxv7bBZFSi0$w1GL143>z%+gUezXS1 literal 24576 zcmeI4e{3690mtv%+2`|}?_S)*ag#R9CH;|~&6<}VZKnZCTennA(>6`T3fjWm`J6a$ zY{x$m$E*W;r5XciuxVpLF!5VrnqbnP15prT{92I;nlum$q!m?KB|286zLDg)p3Ik3c!l#SXF1J=KhSEGv+C&^#o2jAfDvE>7y(9r5nu!u0Y-ok z_`e9;AM^Oa0|VehDrcApJCm9ppU9;0>G5pN$i=g{xRqV$oOkDFWN0j+j}7e@j_9o` zMOyWCy)_n3*yHhNs~%70>!RYALcOMvy z?ApJuUBl@n$a0Cy~&Y{CQhjvBujy+y6{E-2` z#gkFHkeyB7$GUMNpG!H%<0}o|c<)MQ#e;$v?&<(5JWe{DIkWW6mL2lfbuKgYc3EGxRcD~ScVsNGCo-zjBQSJi?7-d; zT)00nGSWhVsunIcs_S5*8G;4>V26`sL6wlM;X03*N%FanGK zBftnS0*nA7zz8q`jKIx7pg|D1HUaRJNBX{=w2{F}Gda)^-6mfQ^Vs46s90#xQqJ(v zFjz8rUE`a4RL1W~fWPM8>+sKb$2LZQ5nu!u0Y-okU<4QeMt~7u1Q-EEfDyR42?T{U z=fU4y6A%{9xJvr@KJaS+HgM{fKAY=y{5gSozlLl zJ)kAEG3|Cu*F^P#`l9-M^$GR3I;9>|2i3JIA2=U46Zmf63xQ7rVu9fR3anP%QO+r+ zm8X@*lzSCZ`JmFH)X7)nSL9#FPstC<1^FI%m)v>lV`cAv5nu!u0Y-okxOEA%6l4H^ z#Pd>9ldu&jCzNL^KiP`54{Ra(?b|ud_kbH2M9P@*Nn&lOM*1re+l>q$rBylXM7C91 zwmB_3+({Y(a9ne3V40ErU+WBW~n2C%Yf68b|XE=|1W}{(qC=qcUn5#NH_9-)c-jWskZbxEj~BWh5SAKaVOGOZRxAD zc+a_!P9$CPZzhpyOJAkM`N1QVy9AQ9B)-m(#U zja<~44VT*}hm5+lFc&ql;qoTRA%ktr7pw(**BdE^jIot3TJ!Va@&?KwLu=(sE58sf zucsU`qWIRVoDnX!QVtnND;={kR=C_kIb{5-ws@Wu`FERg}|&#Ekegb2?$pV4^|FS&PIw#aT0+G^a7q8p>INL{0pFIgvKw zm?)&2M&$jIsG4@hoWMji%4tB}C%rG5c=Fhos7g7jk+{dsw-%Qf^O&fga_W%hY0p{wo5YyIL?z0pMV|XS z-!U>VBZrClC}$P&4110l=}9AliHejHM4m>^J|j70q%l!1<Y*Hn zgpXlke~~PjA+h?VWJ%6 zD2V@?&}^7C>BGd`xcF0Q1B!ELH>t@VsSoHI!2wB5Pui{ zsd=-o5GvhEo%bPr6aS%nblwP+K1Q7v5qN{&n2qJFP^n0r_ag99aA|fjYllh;)OimA zp9RmQre-HXCHK24z8?XBd%#08iBvpPny2zS0^Q*5>Ez6GU1?4R__G*)H^zCyT?Jk7 z^z=+!DKFy>X!rE5xVI)U@l;(Yx0L1)_Zas}J2x>~Tgp0VP$}V2lzY^gv$JYxR`%l# z{G~$FTde&1|2xzRoO(&U0I$G{@H~74{vQ8d06&MP;4|<^_!am#dAON3;jEl2*`CT1>ke zuO95vc4*tREn1tlP77%PO;E4ix_@F~cg_ef0*nA7zz8q`i~u9R2rvSSz|BWM^!WWe zcakIff8d?ObAOjS0Pwvn@bdz98Sf>$f73bUxi_3+p1bsKH-mPg From 5ed957e0f262eebdf734c8f2f6bb60480ca63b91 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 11:28:07 -0500 Subject: [PATCH 14/21] update comment --- sqlx-sqlite/src/index.rs | 45 ++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index ea2ea9b..02d6b00 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -45,36 +45,27 @@ use crate::rewrite::physical_expr_to_sea_query; /// | file_id | file_name | file_size_bytes | row_group_count | row_count | /// | 1 | file1.parquet | 1234 | 3 | 1000 | /// -/// And `SELECT * FROM column_statistics`: -/// | file_id | column_name | row_group | null_count | row_count | int_min_value | int_max_value | string_min_value | string_max_value | -/// |---------|-------------|-----------|------------|-----------|---------------|---------------|------------------|------------------| -/// | 1 | column1 | 0 | 0 | 1000 | 1 | 100 | | | -/// | 1 | column1 | 1 | 0 | 1000 | 101 | 200 | | | -/// | 1 | column1 | 2 | 0 | 1000 | 201 | 300 | | | -/// | 1 | column2 | 0 | 0 | 1000 | | | a | c | -/// | 1 | column2 | 1 | 0 | 1000 | | | c | x | -/// | 1 | column2 | 2 | 0 | 1000 | | | x | z | -/// -/// To do filtering on `column_statistics` we need to self-join the table on `file_id` and `row_group` for each column: +/// And `SELECT * FROM row_group_statistics`: +/// | file_id | row_group | row_count | column1_null_count | column1_min | column1_max | column2_null_count | column2_min | column2_max | +/// | 1 | 0 | 100 | 0 | 1 | 100 | 0 | "a" | "z" | +/// | 1 | 1 | 100 | 0 | 101 | 200 | 0 | "a" | "z" | +/// | 1 | 2 | 100 | 0 | 201 | 300 | 0 | "a" | "z" | +/// | 2 | 0 | 50 | 0 | 1 | 100 | 0 | "x" | "x" | +/// | 2 | 1 | 100 | 0 | 101 | 200 | 0 | "y" | "z" | +/// | 2 | 2 | 150 | 0 | 201 | 300 | 0 | "123" | "456" | /// +/// To do filtering we rewrite the filter expression to a set of SQL expressions that can be evaluated against the index. +/// For example, if we have a filter `a = 5` we would rewrite that to `a_min <= 5 AND a_max >= 5`: +/// /// ```sql -/// WITH column1_stats AS ( -/// SELECT file_id, row_group, int_min_value AS column1_min, int_max_value AS column1_max FROM column_statistics WHERE column_name = 'column1' -/// ), column2_stats AS ( -/// SELECT file_id, row_group, string_min_value AS column2_min, string_max_value AS column2_max FROM column_statistics WHERE column_name = 'column2' +/// WITH row_groups AS ( +/// SELECT file_id, row_group +/// FROM row_group_statistics +/// WHERE a_min <= 5 AND a_max >= 5 /// ) -/// SELECT * -/// FROM column1_stats -/// JOIN column2_stats USING (file_id, row_group) -/// ``` -/// -/// Then to prune files we apply the filter to the joined table, let's call it `wide_column_statistics`: -/// -/// ```sql -/// SELECT file_name, file_size_bytes, row_group_count, row_group -/// FROM wide_column_statistics -/// JOIN file_statistics USING (file_id) -/// WHERE column1_min <= 10 AND column1_max >= 10 AND column2_min <= 'b' AND column2_max >= 'b' +/// SELECT file_name, file_size_bytes, row_group +/// FROM file_statistics +/// JOIN row_groups USING (file_id) /// ``` /// /// While we use SQLite in this example, the index could be implemented with other databases or system. From 18469824ef3faabd866af6cee2ec853578b949a3 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:21:39 -0500 Subject: [PATCH 15/21] update comment --- sqlx-sqlite/src/index.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index 02d6b00..fc59470 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -38,8 +38,8 @@ use crate::rewrite::physical_expr_to_sea_query; /// /// The index is implemented as a SQLite database with two tables: /// - `file_statistics` with columns `file_id`, `file_name`, `file_size_bytes`, `row_group_count`, `row_count` -/// - `column_statistics` with columns `file_id`, `column_name`, `row_group`, `null_count`, `row_count`, -/// and min/max values for each data type we support +/// - `row_group_statistics` with columns `file_id`, `row_group`, `row_count`, +/// and min/max values for each column that is indexed. /// /// Here is roughly what `SELECT * FROM file_statistics` would look like: /// | file_id | file_name | file_size_bytes | row_group_count | row_count | From 1073e8155181a2210768f39652471d1ca57ff170 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:23:44 -0500 Subject: [PATCH 16/21] improve comment qbout {col}_row_count --- sqlx-sqlite/src/index.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index fc59470..017dad9 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -108,8 +108,10 @@ impl SQLiteIndex { // This transforms e.g. `a = 5` to `a_min <= 5 AND a_max >= 5` let pruning = PruningPredicate::try_new(filter, schema.clone())?; let predicate = pruning.predicate_expr().clone(); - // Replace any `{col}_row_count` with `row_count` as we don't store per-column row counts - // (they're the same for all columns in a row group) + // PruningPredicate references the row count of each column as `{col}_row_count` + // But we don't store per-column row counts in the index, we store them a single time for the row group + // since the row count is the same for all columns in a row group. + // Thus we replace any references to `{col}_row_count` with `row_count` in the predicate. let predicate = predicate.transform(|expr| { if let Some(column) = expr.as_any().downcast_ref::() { if column.name().ends_with("_row_count") { From d47bbceccce8b048e933045e2a1f65984bfa1b9c Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:30:24 -0500 Subject: [PATCH 17/21] Move initialize into new --- sqlx-sqlite/index.db | Bin 4096 -> 24576 bytes sqlx-sqlite/src/index.rs | 38 ++++++++++++++------------------------ sqlx-sqlite/src/main.rs | 4 ++-- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index 4ebf78cf96088c7148cb47caf804cc036de75a34..b3caeb72cb5a59ac348468e9f2e7cb1f519e21b5 100644 GIT binary patch literal 24576 zcmeI4du$ZP9mn@^kG5S#^ZU+@cRxE`Ysa!DC&p5attaxOEOGQfVU-|B!fss`1VM7&KKH$_ zHQSzrhD(w8e7ld5&|msa2>w;@YS#r|!yke+?Rx0V+jZx4Zh#x$2DkxkfE(ZjxB+f} z8~C3LJelzL(Y9^knY=@cj9tpt;>l9JQizuw;-tz>$}F!`^A5)%LsJobYG}_`MDJXw zOQ+tgcP3I9JD##S^;FKWlXgjuj!)^)>9MgsW~bYxX1_ff1;!JD4+#INE7b|wow3jtKx6G1dVRw#X z?LK$Uo-a>`{^pRU>6ooKvvg+5n$j&}E(`T;RbRHX^CZh#x$ z2DkxkfE(ZjxB+h9^I>3(>=U|VQEEQY5B3*G$z7Up#2)Jl>IX=2BS{pS5nhP2E0Tr8 zrC?w&xFA`odrmU^r2uch-@4!%H^2>W1Ka>NzzuK%+yFPg4R8b805`x5e7+5Y<*wF) z|Kgf}yii$O3XoRGU4iD}z~UM}^Y{NE{Da`W_{I%z1Ka>NzzuK%+yFPg4R8b805`x5 za07R~fi_X@Z7mQmfBNHVKL09qI~!;77r?g0BZ(3_cgE2jjt!;Fe&!_Nn%c zc3FE_dtN)G+1i*ksIApR^*!~fdQm;6o>tTD7v>u`zzuK%+yFPg4R8b8z#VP?)m2dx z6-iRs+T@3^c1C;EN@c8c3e}ikJJzPOr>tboN~KYi3ASOaQ=7Exf|bmo3KML_>Id2e z%Pd)T9yv@fh}BE#4U0IISwv+fz*zm7`kFakwMZG2m|y^_N7ZM{THTzlpdu4Igw<|! z#;i2VS`8JL;6WVtSnV<$!>r7sJQMWez|R67xd&xB1m&1u3l4lU@VZ$@m?aZsnV=5` zP6nPcbF*f_Mj0mP#ex1n+|10Gxg<(6!2?+Ndti$>moYObG{*!zSb02ur$YO$x*ni%C)u=X%dJUOO(24yS|2d;#7}a@XFu?}wf5e|Q%9c?f zh%kYU{fhsKMloTOO>~k8)??o_zu(Bu8buq$nP46Ez2N(`k)1R0Npykgt$V*`kg8#p(Gey<*gN8_k@>noD(Em1tj6AUZ-mquWWI(DF+m6R zyyIaIy8B%~ssv3jK^yj*@_e6^62vjlBoo|)J!76Pk-{t~*(k;Y zVeDD!*-vtFq>w}tOt1>e?|IgcOorrA=pYk7EMJu0C388FNuzNl09ZaP|A5RE$Xpgh znIMGanEWJ3l*nuz9bkeWmUVf8SPn@P(HIkGSbASxPmC(D%B%M;24Pvn(vPJ-5keY` zj>i}3;jkRQ(pRM))f^;IX;V2- zF*`<>vKNa#6+bM_mhFxRQ}$r-8{$v$bH!xGUZyN#@r3yGY$l)T7-q^67W>5G>0CD5 zzDE@Ut(&HM+*tTT>`fKY+4dn-RF_EjIl}TW+k*k4Y*^vuMoro4HfhVkAjT?`~+--AHqMv-?{4m z*WCXF@CLjDFT$7L1$Z8wh0nq>@D!}V98ACy@DPl`5x5g>hy8FfTnAS}0Da(};A8N< zyI$}%xCX9(H^3!u5xfL0xN8Sz!L#5DI0dR82NK`}I0T|#1ndOcLH`~8!;T+5H^2>W z1Ka>NzzuK%+yFPg4R8bhPX@dm_Zf1hdq!T-?{Sy*E>jMa zC?6_PK3Jga&r@#6QTAmidoz>|q$zvmD7#aXU9*&%la!khl=s_|8!gIClX8PWsT0ce zCn?v(DepT$dGB$`d!C?NdyI0;QOdh#DA5th)rToN4pFvGQ?^Y}-Ze=Xj!~|fpo9l0 z!8m0oN*O#rsf|&p`zZsDQ!1mB{>LbN`zXC5l%5Esyq8iM_Icb@1$mDPMU;nHEopbF MCGB!At0M0B7p-&k@c;k- delta 33 ncmZoTz}TQLL7J73fq{W>Vxv7bBZFSi0$w1GL143>z%+gUezXS1 diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index 017dad9..657bcf7 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -85,8 +85,10 @@ impl Display for SQLiteIndex { } impl SQLiteIndex { - pub fn new(pool: SqlitePool, schema: SchemaRef) -> Self { - Self { pool, schema } + pub async fn try_new(pool: SqlitePool, schema: SchemaRef) -> anyhow::Result { + let r = Self { pool, schema }; + r.initialize().await?; + Ok(r) } /// Return the filenames / row groups that match the filter @@ -160,29 +162,19 @@ impl SQLiteIndex { .await .unwrap(); // TODO: handle error, possibly failing gracefully by scanning all files? - let mut file_scans: HashMap = HashMap::new(); // file_name -> (file_size, row_groups) + let mut file_scans: HashMap = HashMap::new(); // file_name -> (file_size, row_groups) for (file_name, file_size, file_row_group_counts, row_group_to_scan) in row_groups { - let (_, access_plan) = file_scans.entry(file_name).or_insert(( - file_size, - ParquetAccessPlan::new_none(file_row_group_counts as usize), - )); - // Here we could do finer grained row-level filtering, but this example does not implement that - access_plan.set(row_group_to_scan as usize, RowGroupAccess::Scan) + let file_scan_plan = file_scans.entry(file_name.clone()).or_insert( + FileScanPlan { + file_size: file_size as u64, + access_plan: ParquetAccessPlan::new_none(file_row_group_counts as usize), + } + ); + file_scan_plan.access_plan.set(row_group_to_scan as usize, RowGroupAccess::Scan); } - Ok(file_scans - .into_iter() - .map(|(file_name, (file_size, access_plan))| { - ( - file_name, - FileScanPlan { - file_size: file_size as u64, - access_plan, - }, - ) - }) - .collect()) + Ok(file_scans.into_iter().map(|(file_name, file_scan_plan)| (file_name, file_scan_plan)).collect()) } /// Add a new file to the index @@ -362,8 +354,6 @@ impl SQLiteIndex { file_statistics: FileStatisticsInsert, row_group_statistics: Vec, ) -> anyhow::Result<()> { - self.initialize().await?; - let mut transaction = self.pool.begin().await?; let (sql, values) = Query::insert() @@ -457,7 +447,7 @@ impl SQLiteIndex { } /// Simple migration function that idempotently creates the table for the index - pub async fn initialize(&self) -> anyhow::Result<()> { + async fn initialize(&self) -> anyhow::Result<()> { let query = r#" CREATE TABLE IF NOT EXISTS file_statistics ( file_id INTEGER PRIMARY KEY AUTOINCREMENT, diff --git a/sqlx-sqlite/src/main.rs b/sqlx-sqlite/src/main.rs index 3ceb315..9fa2d70 100644 --- a/sqlx-sqlite/src/main.rs +++ b/sqlx-sqlite/src/main.rs @@ -101,7 +101,7 @@ async fn main() -> anyhow::Result<()> { let data = DemoData::try_new()?; // Create a table provider with and our special index. - let index = SQLiteIndex::new( + let index = SQLiteIndex::try_new( pool.clone(), // You probably don't want to index _every_ column in your data // For example, indexing a column of random strings like a UUID would be pointless @@ -115,7 +115,7 @@ async fn main() -> anyhow::Result<()> { ] ) ) - ); + ).await?; let provider = Arc::new(IndexTableProvider::try_new(data.path(), index).await?); println!("** Table Provider:"); println!("{provider}\n"); From 27c1be430c1e84a9cdbdcc1d6242ab0b8f2d36f1 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:31:24 -0500 Subject: [PATCH 18/21] remove old query --- sqlx-sqlite/src/index.rs | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index 657bcf7..7a1bc85 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -459,20 +459,6 @@ impl SQLiteIndex { "#; sqlx::query(&query).execute(&self.pool).await?; - // The statistics columns are hardcoded in this example - // It would be up to you to decide if this is appropriate for your use case - // You could also store the statistics in a more flexible way, e.g. as a JSON blob or as an entity-attribute-value table - // let query = r#" - // CREATE TABLE IF NOT EXISTS row_group_statistics ( - // file_id INTEGER NOT NULL, - // row_group INTEGER NOT NULL, - // row_count INTEGER NOT NULL, - // PRIMARY KEY (file_id, row_group), - // FOREIGN KEY (file_id) REFERENCES file_statistics(file_id) - // ) - // "#; - // sqlx::query(&query).execute(&self.pool).await?; - let sql = Table::create() .table(Alias::new("file_statistics")) .if_not_exists() From 42eb7889aecf1ce30b2d08a93f43aaa35968e1a6 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:38:25 -0500 Subject: [PATCH 19/21] reset db --- sqlx-sqlite/index.db | Bin 24576 -> 4096 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index b3caeb72cb5a59ac348468e9f2e7cb1f519e21b5..4ebf78cf96088c7148cb47caf804cc036de75a34 100644 GIT binary patch delta 33 ncmZoTz}TQLL7J73fq{W>Vxv7bBZFSi0$w1GL143>z%+gUezXS1 literal 24576 zcmeI4du$ZP9mn@^kG5S#^ZU+@cRxE`Ysa!DC&p5attaxOEOGQfVU-|B!fss`1VM7&KKH$_ zHQSzrhD(w8e7ld5&|msa2>w;@YS#r|!yke+?Rx0V+jZx4Zh#x$2DkxkfE(ZjxB+f} z8~C3LJelzL(Y9^knY=@cj9tpt;>l9JQizuw;-tz>$}F!`^A5)%LsJobYG}_`MDJXw zOQ+tgcP3I9JD##S^;FKWlXgjuj!)^)>9MgsW~bYxX1_ff1;!JD4+#INE7b|wow3jtKx6G1dVRw#X z?LK$Uo-a>`{^pRU>6ooKvvg+5n$j&}E(`T;RbRHX^CZh#x$ z2DkxkfE(ZjxB+h9^I>3(>=U|VQEEQY5B3*G$z7Up#2)Jl>IX=2BS{pS5nhP2E0Tr8 zrC?w&xFA`odrmU^r2uch-@4!%H^2>W1Ka>NzzuK%+yFPg4R8b805`x5e7+5Y<*wF) z|Kgf}yii$O3XoRGU4iD}z~UM}^Y{NE{Da`W_{I%z1Ka>NzzuK%+yFPg4R8b805`x5 za07R~fi_X@Z7mQmfBNHVKL09qI~!;77r?g0BZ(3_cgE2jjt!;Fe&!_Nn%c zc3FE_dtN)G+1i*ksIApR^*!~fdQm;6o>tTD7v>u`zzuK%+yFPg4R8b8z#VP?)m2dx z6-iRs+T@3^c1C;EN@c8c3e}ikJJzPOr>tboN~KYi3ASOaQ=7Exf|bmo3KML_>Id2e z%Pd)T9yv@fh}BE#4U0IISwv+fz*zm7`kFakwMZG2m|y^_N7ZM{THTzlpdu4Igw<|! z#;i2VS`8JL;6WVtSnV<$!>r7sJQMWez|R67xd&xB1m&1u3l4lU@VZ$@m?aZsnV=5` zP6nPcbF*f_Mj0mP#ex1n+|10Gxg<(6!2?+Ndti$>moYObG{*!zSb02ur$YO$x*ni%C)u=X%dJUOO(24yS|2d;#7}a@XFu?}wf5e|Q%9c?f zh%kYU{fhsKMloTOO>~k8)??o_zu(Bu8buq$nP46Ez2N(`k)1R0Npykgt$V*`kg8#p(Gey<*gN8_k@>noD(Em1tj6AUZ-mquWWI(DF+m6R zyyIaIy8B%~ssv3jK^yj*@_e6^62vjlBoo|)J!76Pk-{t~*(k;Y zVeDD!*-vtFq>w}tOt1>e?|IgcOorrA=pYk7EMJu0C388FNuzNl09ZaP|A5RE$Xpgh znIMGanEWJ3l*nuz9bkeWmUVf8SPn@P(HIkGSbASxPmC(D%B%M;24Pvn(vPJ-5keY` zj>i}3;jkRQ(pRM))f^;IX;V2- zF*`<>vKNa#6+bM_mhFxRQ}$r-8{$v$bH!xGUZyN#@r3yGY$l)T7-q^67W>5G>0CD5 zzDE@Ut(&HM+*tTT>`fKY+4dn-RF_EjIl}TW+k*k4Y*^vuMoro4HfhVkAjT?`~+--AHqMv-?{4m z*WCXF@CLjDFT$7L1$Z8wh0nq>@D!}V98ACy@DPl`5x5g>hy8FfTnAS}0Da(};A8N< zyI$}%xCX9(H^3!u5xfL0xN8Sz!L#5DI0dR82NK`}I0T|#1ndOcLH`~8!;T+5H^2>W z1Ka>NzzuK%+yFPg4R8bhPX@dm_Zf1hdq!T-?{Sy*E>jMa zC?6_PK3Jga&r@#6QTAmidoz>|q$zvmD7#aXU9*&%la!khl=s_|8!gIClX8PWsT0ce zCn?v(DepT$dGB$`d!C?NdyI0;QOdh#DA5th)rToN4pFvGQ?^Y}-Ze=Xj!~|fpo9l0 z!8m0oN*O#rsf|&p`zZsDQ!1mB{>LbN`zXC5l%5Esyq8iM_Icb@1$mDPMU;nHEopbF MCGB!At0M0B7p-&k@c;k- From a6c1d693a6be9cc985cec16acaafa7a89400d516 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:41:52 -0500 Subject: [PATCH 20/21] use sqlx::FromRow for results --- sqlx-sqlite/index.db | Bin 4096 -> 24576 bytes sqlx-sqlite/src/index.rs | 22 +++++++++++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index 4ebf78cf96088c7148cb47caf804cc036de75a34..583eb3ce3e5da662d631494986dfb748661f8896 100644 GIT binary patch literal 24576 zcmeI4du$v>9mj7Ucejtb8DH$n#LjE&JnV}fiN|qX4I*xOiLAO#?Kq+a!7=-|^ZMQ| z-$SBOX<8JD1VMt*KT=UqA*KJ&s-jf{MG8fN6se(qP^qFI(m!YsqG<&|lz_zEO>9q$ zLy`C=m1cFP&&}^UJGb}Qxs`VOI6X7vRvaUlFJ(!^81r@bMA3K5Fnm6r=sg48b9rmE z<>iJ)fq8wWm!fZ^^xZK0yU(v*^8p850-gG`@N0MK&gW1Ka>NzzuK%+yFQ5 ze;If*DF@N`xbS$sLd=X)%GVO9QodS9lq;m-mMgARUa#gqJ{zByiyLzj_fEx)zV*8F z83RUN(#<#t*X}diT*XN_C1d*3oH2c7YHEnt>5i$DujVT2rb{GKbrQL1CX@KMPFXki zUxvt%`nqYwsaHO3{C1!A@U2(uN25t==rzfMvy%@@%+4G4$LEcK6+5@dhz%JhPR+(A zPfoAt6*FezC*rg5>ErR!x6H1dB+vHBa53Z5v~gd2D(;=2;}fTkPuv$bVki9p^to}t z=jLpuUM^<5+qwj)R`Tt3V!acX7+tR#cu)$U!9iiZJzs7ORCbD0Cuccpn(}S4WKB4b zBUxw2+cWm%86nsj@+=**J!e)w*_x(!+n8%YV?Z<3Y`12hF*!XKKN+7j*fTJ3X71GF zv^U@b@#(pt^>=L@r7;(O$eXsermk^jdUEDW{0{xgZqrGajfzuVdCTP;|8tL3!R=Z6 zKkl)9-`fs9_9-!dW(Mzcy4Pq+jIJq2-Ze#OU01^1Q-Ux0;Jfg34}9YWxB+f}8{h`G z0d9aB;0Cw>Zh#x$2DpK{!$7wb@bycA*t(@38!3>Iw=`1`V)h~J10;HpCNzzuK%+yFPg4R8b805`x5a0A@H-EANu z^|x>QSJnii<;u!ZfY>4RtF6U>l{J9Y&;JGZC!hD=8#llWa0A=`H^2>W1Ka>NzzuK% z+yFPg4SezqbPCd7dx3!Y(;umI|9@1Q^}!F}@8A{qBD?^<0t;{+-U|m|7x)+W1Goxa z22X>>Kn9!!M?f!7!q>xZhF=Lk8-5~O3eSg+h5N%GbR+b3=+)43q4S}7C=ogl8V+^o zH}!Y)EBf>LllsHDqfhB$dXFw>?`c=Hm$VDoSuO3oG2gfWZh#x$2DkxkfE(ZjKH&z? zQB4p8MHH3JPU#@lAJ<>D-He@f(LGFX0PA!5V|FTMyJ>WU3C6MBr=PZ+f}P5u!%VOr zYai&nwpFs7JUYY#V_5r%cHJfw+bW`iOn|ZWHSHCvv1F4nI=}>@So^H@gjK6sjVc;v zf_+#U(B`db)2h|bekRz9)xT-|R>icc4K&6CBUt^Jdc!*?t3nWFf?=$FQ+?GcB(0K# zMwws;t6x;lTe$_R;Glg>Fo@L=HDP5Ity~K2Wr96e`KLN;EoQ8Yi$<6rhLzWpo7O_k zT1=y1CK$lVca$quvS2M_(GU~#V!MyJ*n$Jc0!7o!ndvk#m|!ywfWW8CRKaw!Xgd@1VE@~Ju<4Y{R32?( zf^O`8&i@XZDoQ@*kAWwG|7@_mC+U^K-ho6Un7mWNvdcw6Kur(E`OZVnxs)f zJxmbA@;m-cZzNJ{pl&AU!t(R-HSb$MmI&I!1f5uZSpESiB}v6XhzT}ec}o5gDJ+nZ zgElfj1j{}0{Uo~ zkY+Oqmg|v-q+;=_;*aXhW-S_K$_f@A63^AldLtTQ%0Vpd7C%$9Yb2^O7zn*!K@%&@H61U4BhaR;$18eUvJ>`7U|2uZVr$_Wj(cq>7zVyRRTBk%)+G z-?ywKryLPA)f?8k97iG;TcmDP!-A%IV|tG;#G}pD@BbeKF(14MZ@~Ax{{rxP_!fM_ zTM4)fFToe!MfePS3Z94O;3KdBi_nF{TN{{xlW+nahNCbBdto<>Kott$Bk&>kBe(|M z0&jpT;4-)bUH})tGvFz39-IS@fCeZ67Z5NHX22wv0EfY-_r`qV2DkxkfE(ZjxB+f} z8{h`G0d9aBxVsDlWp7naI(U@wz&(`XM=19nrW`v&i4Rha9-!PePPunK<;WQ2Fs2+D zr5xNxxo0nBY=m-Pn6iI}a`zzRu051HW0X4vDEs;;dv{YByC}Esq};ZHa%&&umR`!u z24&B7%IJ-EtDHKQ$~9zySgbmH&Jdtl#z{;9Z^cyMF~17!y70=5lX#-QiGH# xpj5(?!4PFYr}S%-vPvl_lwvR-dkYFuz=I-4{&q{0+bvP@%igMhD9fs%{u^;b*-rof delta 33 ncmZoTz}TQLL7J73fq{W>Vxv7bBZFSi0$w1GL143>z%+gUezXS1 diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index 7a1bc85..fcc326d 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -157,21 +157,21 @@ impl SQLiteIndex { let (sql, values) = query.build_sqlx(SqliteQueryBuilder); - let row_groups: Vec<(String, i64, i64, i64)> = sqlx::query_as_with(&sql, values) + let row_groups: Vec = sqlx::query_as_with(&sql, values) .fetch_all(&self.pool) .await .unwrap(); // TODO: handle error, possibly failing gracefully by scanning all files? let mut file_scans: HashMap = HashMap::new(); // file_name -> (file_size, row_groups) - for (file_name, file_size, file_row_group_counts, row_group_to_scan) in row_groups { - let file_scan_plan = file_scans.entry(file_name.clone()).or_insert( + for row_group in row_groups { + let file_scan_plan = file_scans.entry(row_group.file_name.clone()).or_insert( FileScanPlan { - file_size: file_size as u64, - access_plan: ParquetAccessPlan::new_none(file_row_group_counts as usize), + file_size: row_group.file_size_bytes as u64, + access_plan: ParquetAccessPlan::new_none(row_group.row_group_count as usize), } ); - file_scan_plan.access_plan.set(row_group_to_scan as usize, RowGroupAccess::Scan); + file_scan_plan.access_plan.set(row_group.row_group as usize, RowGroupAccess::Scan); } Ok(file_scans.into_iter().map(|(file_name, file_scan_plan)| (file_name, file_scan_plan)).collect()) @@ -574,4 +574,12 @@ struct FileStatisticsInsert { file_size_bytes: i64, row_group_count: i64, row_count: i64, -} \ No newline at end of file +} + +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct RowGroupToScan { + pub file_name: String, + pub file_size_bytes: i64, + pub row_group_count: i64, + pub row_group: i64, +} From e88641bd1498bb433718f299bd60360f474e9123 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Wed, 12 Jun 2024 16:27:11 -0500 Subject: [PATCH 21/21] Cleaner conversion --- sqlx-sqlite/index.db | Bin 24576 -> 4096 bytes sqlx-sqlite/src/conversions.rs | 229 +++++++++++++++++++++++++++++++++ sqlx-sqlite/src/index.rs | 167 ++++-------------------- sqlx-sqlite/src/main.rs | 2 +- sqlx-sqlite/src/rewrite.rs | 83 ------------ 5 files changed, 252 insertions(+), 229 deletions(-) create mode 100644 sqlx-sqlite/src/conversions.rs delete mode 100644 sqlx-sqlite/src/rewrite.rs diff --git a/sqlx-sqlite/index.db b/sqlx-sqlite/index.db index 583eb3ce3e5da662d631494986dfb748661f8896..4ebf78cf96088c7148cb47caf804cc036de75a34 100644 GIT binary patch delta 33 ncmZoTz}TQLL7J73fq{W>Vxv7bBZFSi0$w1GL143>z%+gUezXS1 literal 24576 zcmeI4du$v>9mj7Ucejtb8DH$n#LjE&JnV}fiN|qX4I*xOiLAO#?Kq+a!7=-|^ZMQ| z-$SBOX<8JD1VMt*KT=UqA*KJ&s-jf{MG8fN6se(qP^qFI(m!YsqG<&|lz_zEO>9q$ zLy`C=m1cFP&&}^UJGb}Qxs`VOI6X7vRvaUlFJ(!^81r@bMA3K5Fnm6r=sg48b9rmE z<>iJ)fq8wWm!fZ^^xZK0yU(v*^8p850-gG`@N0MK&gW1Ka>NzzuK%+yFQ5 ze;If*DF@N`xbS$sLd=X)%GVO9QodS9lq;m-mMgARUa#gqJ{zByiyLzj_fEx)zV*8F z83RUN(#<#t*X}diT*XN_C1d*3oH2c7YHEnt>5i$DujVT2rb{GKbrQL1CX@KMPFXki zUxvt%`nqYwsaHO3{C1!A@U2(uN25t==rzfMvy%@@%+4G4$LEcK6+5@dhz%JhPR+(A zPfoAt6*FezC*rg5>ErR!x6H1dB+vHBa53Z5v~gd2D(;=2;}fTkPuv$bVki9p^to}t z=jLpuUM^<5+qwj)R`Tt3V!acX7+tR#cu)$U!9iiZJzs7ORCbD0Cuccpn(}S4WKB4b zBUxw2+cWm%86nsj@+=**J!e)w*_x(!+n8%YV?Z<3Y`12hF*!XKKN+7j*fTJ3X71GF zv^U@b@#(pt^>=L@r7;(O$eXsermk^jdUEDW{0{xgZqrGajfzuVdCTP;|8tL3!R=Z6 zKkl)9-`fs9_9-!dW(Mzcy4Pq+jIJq2-Ze#OU01^1Q-Ux0;Jfg34}9YWxB+f}8{h`G z0d9aB;0Cw>Zh#x$2DpK{!$7wb@bycA*t(@38!3>Iw=`1`V)h~J10;HpCNzzuK%+yFPg4R8b805`x5a0A@H-EANu z^|x>QSJnii<;u!ZfY>4RtF6U>l{J9Y&;JGZC!hD=8#llWa0A=`H^2>W1Ka>NzzuK% z+yFPg4SezqbPCd7dx3!Y(;umI|9@1Q^}!F}@8A{qBD?^<0t;{+-U|m|7x)+W1Goxa z22X>>Kn9!!M?f!7!q>xZhF=Lk8-5~O3eSg+h5N%GbR+b3=+)43q4S}7C=ogl8V+^o zH}!Y)EBf>LllsHDqfhB$dXFw>?`c=Hm$VDoSuO3oG2gfWZh#x$2DkxkfE(ZjKH&z? zQB4p8MHH3JPU#@lAJ<>D-He@f(LGFX0PA!5V|FTMyJ>WU3C6MBr=PZ+f}P5u!%VOr zYai&nwpFs7JUYY#V_5r%cHJfw+bW`iOn|ZWHSHCvv1F4nI=}>@So^H@gjK6sjVc;v zf_+#U(B`db)2h|bekRz9)xT-|R>icc4K&6CBUt^Jdc!*?t3nWFf?=$FQ+?GcB(0K# zMwws;t6x;lTe$_R;Glg>Fo@L=HDP5Ity~K2Wr96e`KLN;EoQ8Yi$<6rhLzWpo7O_k zT1=y1CK$lVca$quvS2M_(GU~#V!MyJ*n$Jc0!7o!ndvk#m|!ywfWW8CRKaw!Xgd@1VE@~Ju<4Y{R32?( zf^O`8&i@XZDoQ@*kAWwG|7@_mC+U^K-ho6Un7mWNvdcw6Kur(E`OZVnxs)f zJxmbA@;m-cZzNJ{pl&AU!t(R-HSb$MmI&I!1f5uZSpESiB}v6XhzT}ec}o5gDJ+nZ zgElfj1j{}0{Uo~ zkY+Oqmg|v-q+;=_;*aXhW-S_K$_f@A63^AldLtTQ%0Vpd7C%$9Yb2^O7zn*!K@%&@H61U4BhaR;$18eUvJ>`7U|2uZVr$_Wj(cq>7zVyRRTBk%)+G z-?ywKryLPA)f?8k97iG;TcmDP!-A%IV|tG;#G}pD@BbeKF(14MZ@~Ax{{rxP_!fM_ zTM4)fFToe!MfePS3Z94O;3KdBi_nF{TN{{xlW+nahNCbBdto<>Kott$Bk&>kBe(|M z0&jpT;4-)bUH})tGvFz39-IS@fCeZ67Z5NHX22wv0EfY-_r`qV2DkxkfE(ZjxB+f} z8{h`G0d9aBxVsDlWp7naI(U@wz&(`XM=19nrW`v&i4Rha9-!PePPunK<;WQ2Fs2+D zr5xNxxo0nBY=m-Pn6iI}a`zzRu051HW0X4vDEs;;dv{YByC}Esq};ZHa%&&umR`!u z24&B7%IJ-EtDHKQ$~9zySgbmH&Jdtl#z{;9Z^cyMF~17!y70=5lX#-QiGH# xpj5(?!4PFYr}S%-vPvl_lwvR-dkYFuz=I-4{&q{0+bvP@%igMhD9fs%{u^;b*-rof diff --git a/sqlx-sqlite/src/conversions.rs b/sqlx-sqlite/src/conversions.rs new file mode 100644 index 0000000..50351d3 --- /dev/null +++ b/sqlx-sqlite/src/conversions.rs @@ -0,0 +1,229 @@ +use datafusion::arrow::{array::{Array, ArrayRef, AsArray}, datatypes::DataType}; +use datafusion_common::ScalarValue; +use datafusion_expr::Operator; +use datafusion::arrow::datatypes; +use datafusion_physical_expr::{expressions as phys_expr, PhysicalExprRef}; +use sea_query::{Alias, BinOper, CaseStatement, ColumnRef, IntoIden, SimpleExpr, Value}; + + +/// Convert a DataFusion PhysicalExpr to a SeaQuery SimpleExpr +pub fn physical_expr_to_sea_query(expr: &PhysicalExprRef) -> SimpleExpr { + if let Some(expr) = expr.as_any().downcast_ref::(){ + let left = physical_expr_to_sea_query(expr.left()); + let right = physical_expr_to_sea_query(expr.right()); + match expr.op() { + Operator::Eq => left.binary(BinOper::Equal, right), + Operator::NotEq => left.binary(BinOper::NotEqual, right), + Operator::Lt => left.binary(BinOper::SmallerThan, right), + Operator::LtEq => left.binary(BinOper::SmallerThanOrEqual, right), + Operator::Gt => left.binary(BinOper::GreaterThan, right), + Operator::GtEq => left.binary(BinOper::GreaterThanOrEqual, right), + Operator::Plus => left.binary(BinOper::Add, right), + Operator::Minus => left.binary(BinOper::Sub, right), + Operator::Multiply => left.binary(BinOper::Mul, right), + Operator::Divide => left.binary(BinOper::Div, right), + Operator::Modulo => left.binary(BinOper::Mod, right), + Operator::And => left.binary(BinOper::And, right), + Operator::Or => left.binary(BinOper::Or, right), + Operator::LikeMatch => left.binary(BinOper::Like, right), + Operator::NotLikeMatch => left.binary(BinOper::NotLike, right), + Operator::BitwiseShiftLeft => left.binary(BinOper::LShift, right), + Operator::BitwiseShiftRight => left.binary(BinOper::RShift, right), + _ => SimpleExpr::Constant(Value::Bool(Some(true))) + } + } else if let Some(expr) = expr.as_any().downcast_ref::() { + SimpleExpr::Column(ColumnRef::Column(Alias::new(expr.name().to_string()).into_iden())) + } else if let Some(expr) = expr.as_any().downcast_ref::() { + match expr.value() { + ScalarValue::Null => SimpleExpr::Keyword(sea_query::Keyword::Null), + ScalarValue::Boolean(v) => SimpleExpr::Constant(Value::Bool(*v)), + ScalarValue::Float32(v) => SimpleExpr::Constant(Value::Float(*v)), + ScalarValue::Float64(v) => SimpleExpr::Constant(Value::Double(*v)), + ScalarValue::Int8(v) => SimpleExpr::Constant(Value::TinyInt(*v)), + ScalarValue::Int16(v) => SimpleExpr::Constant(Value::SmallInt(*v)), + ScalarValue::Int32(v) => SimpleExpr::Constant(Value::Int(*v)), + ScalarValue::Int64(v) => SimpleExpr::Constant(Value::BigInt(*v)), + ScalarValue::UInt8(v) => SimpleExpr::Constant(Value::TinyUnsigned(*v)), + ScalarValue::UInt16(v) => SimpleExpr::Constant(Value::SmallUnsigned(*v)), + ScalarValue::UInt32(v) => SimpleExpr::Constant(Value::Unsigned(*v)), + ScalarValue::UInt64(v) => SimpleExpr::Constant(Value::BigUnsigned(*v)), + ScalarValue::Utf8(v) => match v { + Some(v) => SimpleExpr::Constant(Value::String(Some(Box::new(v.to_string())))), + None => SimpleExpr::Constant(Value::String(None)), + }, + ScalarValue::LargeUtf8(v) => match v { + Some(v) => SimpleExpr::Constant(Value::String(Some(Box::new(v.to_string())))), + None => SimpleExpr::Constant(Value::String(None)), + }, + ScalarValue::Binary(v) => match v { + Some(v) => SimpleExpr::Constant(Value::Bytes(Some(Box::new(v.to_vec())))), + None => SimpleExpr::Constant(Value::Bytes(None)), + }, + ScalarValue::FixedSizeBinary(_, v) => match v { + Some(v) => SimpleExpr::Constant(Value::Bytes(Some(Box::new(v.to_vec())))), + None => SimpleExpr::Constant(Value::Bytes(None)), + }, + ScalarValue::LargeBinary(v) => match v { + Some(v) => SimpleExpr::Constant(Value::Bytes(Some(Box::new(v.to_vec())))), + None => SimpleExpr::Constant(Value::Bytes(None)), + }, + // Extend with other types, e.g. to support arrays, dates, etc. + _ => SimpleExpr::Constant(Value::Bool(Some(true))) + } + } else if let Some(expr) = expr.as_any().downcast_ref::() { + let mut case = CaseStatement::new(); + for (when, then) in expr.when_then_expr() { + case = case.case(physical_expr_to_sea_query(when), physical_expr_to_sea_query(then)); + } + if let Some(else_exp) = expr.else_expr() { + case = case.finally(physical_expr_to_sea_query(else_exp)); + }; + SimpleExpr::Case(Box::new(case)) + } else { + SimpleExpr::Constant(Value::Bool(Some(true))) + } +} + +/// Convert a DataFusion Array to a Vec of SeaQuery Values +pub fn array_to_values(array: ArrayRef) -> Option> { + let values = match array.data_type() { + DataType::Int8 => { + let array = array.as_primitive::(); + array.iter().map(|v| { + match v { + Some(v) => Value::TinyInt(Some(v)), + None => Value::TinyInt(None) + } + }).collect() + } + DataType::Int16 => { + let array = array.as_primitive::(); + array.iter().map(|v| { + match v { + Some(v) => Value::SmallInt(Some(v)), + None => Value::SmallInt(None) + } + }).collect() + } + DataType::Int32 => { + let array = array.as_primitive::(); + array.iter().map(|v| { + match v { + Some(v) => Value::Int(Some(v)), + None => Value::Int(None) + } + }).collect() + } + DataType::Int64 => { + let array = array.as_primitive::(); + array.iter().map(|v| { + match v { + Some(v) => Value::BigInt(Some(v)), + None => Value::BigInt(None) + } + }).collect() + } + DataType::UInt8 => { + let array = array.as_primitive::(); + array.iter().map(|v| { + match v { + Some(v) => Value::TinyUnsigned(Some(v)), + None => Value::TinyUnsigned(None) + } + }).collect() + } + DataType::UInt16 => { + let array = array.as_primitive::(); + array.iter().map(|v| { + match v { + Some(v) => Value::SmallUnsigned(Some(v)), + None => Value::SmallUnsigned(None) + } + }).collect() + } + DataType::UInt32 => { + let array = array.as_primitive::(); + array.iter().map(|v| { + match v { + Some(v) => Value::Unsigned(Some(v)), + None => Value::Unsigned(None) + } + }).collect() + } + DataType::UInt64 => { + let array = array.as_primitive::(); + array.iter().map(|v| { + match v { + Some(v) => Value::BigUnsigned(Some(v)), + None => Value::BigUnsigned(None) + } + }).collect() + } + DataType::Float32 => { + let array = array.as_primitive::(); + array.iter().map(|v| { + match v { + Some(v) => Value::Float(Some(v)), + None => Value::Float(None) + } + }).collect() + } + DataType::Float64 => { + let array = array.as_primitive::(); + array.iter().map(|v| { + match v { + Some(v) => Value::Double(Some(v)), + None => Value::Double(None) + } + }).collect() + } + DataType::Utf8 => { + let array = array.as_string::(); + array.iter().map(|v| { + match v { + Some(v) => Value::String(Some(Box::new(v.to_string()))), + None => Value::String(None) + } + }).collect() + } + DataType::LargeUtf8 => { + let array = array.as_string::(); + array.iter().map(|v| { + match v { + Some(v) => Value::String(Some(Box::new(v.to_string()))), + None => Value::String(None) + } + }).collect() + } + DataType::Binary => { + let array = array.as_binary::(); + array.iter().map(|v| { + match v { + Some(v) => Value::Bytes(Some(Box::new(v.to_vec()))), + None => Value::Bytes(None) + } + }).collect() + } + DataType::FixedSizeBinary(_) => { + let array = array.as_fixed_size_binary(); + array.iter().map(|v| { + match v { + Some(v) => Value::Bytes(Some(Box::new(v.to_vec()))), + None => Value::Bytes(None) + } + }).collect() + } + DataType::LargeBinary => { + let array = array.as_binary::(); + array.iter().map(|v| { + match v { + Some(v) => Value::Bytes(Some(Box::new(v.to_vec()))), + None => Value::Bytes(None) + } + }).collect() + } + // Extend with other types, e.g. to support arrays, dates, etc. + _ => return None, + }; + Some(values) +} \ No newline at end of file diff --git a/sqlx-sqlite/src/index.rs b/sqlx-sqlite/src/index.rs index fcc326d..aa62456 100644 --- a/sqlx-sqlite/src/index.rs +++ b/sqlx-sqlite/src/index.rs @@ -1,9 +1,7 @@ use std::{collections::HashMap, fmt::Display, fs::File, path::Path, sync::Arc}; use datafusion::arrow::array::AsArray; -use datafusion::arrow::datatypes::{ - DataType, Int16Type, Int32Type, Int64Type, Int8Type, SchemaRef, UInt16Type, UInt32Type, UInt64Type, UInt8Type -}; +use datafusion::arrow::datatypes::{DataType, SchemaRef, UInt64Type}; use datafusion::physical_optimizer::pruning::PruningPredicate; use datafusion::{ datasource::physical_plan::parquet::{ParquetAccessPlan, RowGroupAccess, StatisticsConverter}, @@ -13,13 +11,13 @@ use datafusion_common::tree_node::TreeNode; use datafusion_common::{internal_datafusion_err, DataFusionError, Result, tree_node::{Transformed, TransformedResult}}; use datafusion_physical_expr::PhysicalExpr; use sea_query::{ - Alias, ColumnDef, CommonTableExpression, Expr as SeaQExpr, ForeignKey, ForeignKeyAction, Index, OnConflict, Query, SimpleExpr, SqliteQueryBuilder, Table, WithClause + Alias, ColumnDef, CommonTableExpression, Expr as SeaQExpr, ForeignKey, ForeignKeyAction, Index, OnConflict, Query, SimpleExpr, SqliteQueryBuilder, Table, Value, WithClause }; use sea_query_binder::SqlxBinder; use sqlx::SqlitePool; use datafusion_physical_expr::expressions as phys_expr; -use crate::rewrite::physical_expr_to_sea_query; +use crate::conversions::{array_to_values, physical_expr_to_sea_query}; /// SQLite secondary index for a set of parquet files /// @@ -209,132 +207,24 @@ impl SQLiteIndex { for field in self.schema.fields() { let column_name = field.name().clone(); let converter = StatisticsConverter::try_new(&column_name, schema, parquet_schema)?; - let min_values = converter.row_group_mins(row_groups.iter())?; - let max_values = converter.row_group_maxes(row_groups.iter())?; let null_counts = converter.row_group_null_counts(row_groups.iter())?; let null_counts = null_counts.as_primitive::(); + let (min_values, max_values) = match (array_to_values(converter.row_group_mins(row_groups.iter())?), array_to_values(converter.row_group_maxes(row_groups.iter())?)) { + (Some(min_values), Some(max_values)) => (min_values, max_values), + // If we don't support the type skip collecting statistics for this column + _ => continue, + }; + for row_group in 0..metadata.num_row_groups() { - match field.data_type() { - datafusion::arrow::datatypes::DataType::Int8 => { - let min_values = min_values.as_primitive::(); - let max_values = max_values.as_primitive::(); - let min = min_values.value(row_group) as i64; - let max = max_values.value(row_group) as i64; - let column_statistics = ColumnStatistics { - null_count: null_counts.value(row_group) as i64, - stats: MinMaxStats::Int(min, max), - }; - row_group_statistics[row_group] - .column_statistics - .push(column_statistics); - } - datafusion::arrow::datatypes::DataType::UInt8 => { - let min_values = min_values.as_primitive::(); - let max_values = max_values.as_primitive::(); - let min = min_values.value(row_group) as i64; - let max = max_values.value(row_group) as i64; - let column_statistics = ColumnStatistics { - null_count: null_counts.value(row_group) as i64, - stats: MinMaxStats::Int(min, max), - }; - row_group_statistics[row_group] - .column_statistics - .push(column_statistics); - } - datafusion::arrow::datatypes::DataType::Int16 => { - let min_values = min_values.as_primitive::(); - let max_values = max_values.as_primitive::(); - let min = min_values.value(row_group) as i64; - let max = max_values.value(row_group) as i64; - let column_statistics = ColumnStatistics { - null_count: null_counts.value(row_group) as i64, - stats: MinMaxStats::Int(min, max), - }; - row_group_statistics[row_group] - .column_statistics - .push(column_statistics); - } - datafusion::arrow::datatypes::DataType::UInt16 => { - let min_values = min_values.as_primitive::(); - let max_values = max_values.as_primitive::(); - let min = min_values.value(row_group) as i64; - let max = max_values.value(row_group) as i64; - let column_statistics = ColumnStatistics { - null_count: null_counts.value(row_group) as i64, - stats: MinMaxStats::Int(min, max), - }; - row_group_statistics[row_group] - .column_statistics - .push(column_statistics); - } - datafusion::arrow::datatypes::DataType::Int32 => { - let min_values = min_values.as_primitive::(); - let max_values = max_values.as_primitive::(); - let min = min_values.value(row_group) as i64; - let max = max_values.value(row_group) as i64; - let column_statistics = ColumnStatistics { - null_count: null_counts.value(row_group) as i64, - stats: MinMaxStats::Int(min, max), - }; - row_group_statistics[row_group] - .column_statistics - .push(column_statistics); - } - datafusion::arrow::datatypes::DataType::UInt32 => { - let min_values = min_values.as_primitive::(); - let max_values = max_values.as_primitive::(); - let min = min_values.value(row_group) as i64; - let max = max_values.value(row_group) as i64; - let column_statistics = ColumnStatistics { - null_count: null_counts.value(row_group) as i64, - stats: MinMaxStats::Int(min, max), - }; - row_group_statistics[row_group] - .column_statistics - .push(column_statistics); - } - datafusion::arrow::datatypes::DataType::Int64 => { - let min_values = min_values.as_primitive::(); - let max_values = max_values.as_primitive::(); - let min = min_values.value(row_group); - let max = max_values.value(row_group); - let column_statistics = ColumnStatistics { - null_count: null_counts.value(row_group) as i64, - stats: MinMaxStats::Int(min, max), - }; - row_group_statistics[row_group] - .column_statistics - .push(column_statistics); - } - datafusion::arrow::datatypes::DataType::Utf8 => { - let min_values = min_values.as_string::(); - let max_values = max_values.as_string::(); - let min = min_values.value(row_group).to_string(); - let max = max_values.value(row_group).to_string(); - let column_statistics = ColumnStatistics { - null_count: null_counts.value(row_group) as i64, - stats: MinMaxStats::String(min, max), - }; - row_group_statistics[row_group] - .column_statistics - .push(column_statistics); - } - datafusion::arrow::datatypes::DataType::LargeUtf8 => { - let min_values = min_values.as_string::(); - let max_values = max_values.as_string::(); - let min = min_values.value(row_group).to_string(); - let max = max_values.value(row_group).to_string(); - let column_statistics = ColumnStatistics { - null_count: null_counts.value(row_group) as i64, - stats: MinMaxStats::String(min, max), - }; - row_group_statistics[row_group] - .column_statistics - .push(column_statistics); - } - _ => {} // ignore other types, we just don't put them in the index and filters will not be pushed down - } + let statistics = ColumnStatistics { + null_count: null_counts.value(row_group) as i64, + min_max: ( + min_values[row_group].clone(), + max_values[row_group].clone(), + ), + }; + row_group_statistics[row_group].column_statistics.push(statistics); } } @@ -418,18 +308,10 @@ impl SQLiteIndex { statistics.row_count.into(), ]; for stats in statistics.column_statistics { - match stats.stats { - MinMaxStats::Int(min, max) => { - values.push(stats.null_count.into()); - values.push(min.into()); - values.push(max.into()); - } - MinMaxStats::String(min, max) => { - values.push(stats.null_count.into()); - values.push(min.into()); - values.push(max.into()); - } - } + let (min, max) = stats.min_max; + values.push(stats.null_count.into()); + values.push(min.into()); + values.push(max.into()); } query = query.values_panic(values).to_owned(); @@ -538,11 +420,6 @@ pub struct FileScanPlan { pub access_plan: ParquetAccessPlan, } -#[derive(Debug, Clone)] -pub enum MinMaxStats { - Int(i64, i64), - String(String, String), -} #[derive(Debug, Clone)] pub struct RowGroupStatisticsInsert { @@ -565,7 +442,7 @@ impl RowGroupStatisticsInsert { #[derive(Debug, Clone)] pub struct ColumnStatistics { null_count: i64, - stats: MinMaxStats, + min_max: (Value, Value), } #[derive(Debug, Clone)] diff --git a/sqlx-sqlite/src/main.rs b/sqlx-sqlite/src/main.rs index 9fa2d70..14be089 100644 --- a/sqlx-sqlite/src/main.rs +++ b/sqlx-sqlite/src/main.rs @@ -25,7 +25,7 @@ use url::Url; use crate::index::SQLiteIndex; mod index; -mod rewrite; +mod conversions; /// This example demonstrates building a secondary index over multiple Parquet /// files and using that index during query to skip ("prune") files and row groups diff --git a/sqlx-sqlite/src/rewrite.rs b/sqlx-sqlite/src/rewrite.rs deleted file mode 100644 index 3a75240..0000000 --- a/sqlx-sqlite/src/rewrite.rs +++ /dev/null @@ -1,83 +0,0 @@ -use datafusion_common::ScalarValue; -use datafusion_expr::Operator; -use datafusion_physical_expr::{expressions as phys_expr, PhysicalExprRef}; -use sea_query::{Alias, BinOper, CaseStatement, ColumnRef, IntoIden, SimpleExpr, Value}; - - -/// Convert a DataFusion PhysicalExpr to a SeaQuery SimpleExpr -pub fn physical_expr_to_sea_query(expr: &PhysicalExprRef) -> SimpleExpr { - if let Some(expr) = expr.as_any().downcast_ref::(){ - let left = physical_expr_to_sea_query(expr.left()); - let right = physical_expr_to_sea_query(expr.right()); - match expr.op() { - Operator::Eq => left.binary(BinOper::Equal, right), - Operator::NotEq => left.binary(BinOper::NotEqual, right), - Operator::Lt => left.binary(BinOper::SmallerThan, right), - Operator::LtEq => left.binary(BinOper::SmallerThanOrEqual, right), - Operator::Gt => left.binary(BinOper::GreaterThan, right), - Operator::GtEq => left.binary(BinOper::GreaterThanOrEqual, right), - Operator::Plus => left.binary(BinOper::Add, right), - Operator::Minus => left.binary(BinOper::Sub, right), - Operator::Multiply => left.binary(BinOper::Mul, right), - Operator::Divide => left.binary(BinOper::Div, right), - Operator::Modulo => left.binary(BinOper::Mod, right), - Operator::And => left.binary(BinOper::And, right), - Operator::Or => left.binary(BinOper::Or, right), - Operator::LikeMatch => left.binary(BinOper::Like, right), - Operator::NotLikeMatch => left.binary(BinOper::NotLike, right), - Operator::BitwiseShiftLeft => left.binary(BinOper::LShift, right), - Operator::BitwiseShiftRight => left.binary(BinOper::RShift, right), - _ => SimpleExpr::Constant(Value::Bool(Some(true))) - } - } else if let Some(expr) = expr.as_any().downcast_ref::() { - SimpleExpr::Column(ColumnRef::Column(Alias::new(expr.name().to_string()).into_iden())) - } else if let Some(expr) = expr.as_any().downcast_ref::() { - match expr.value() { - ScalarValue::Null => SimpleExpr::Keyword(sea_query::Keyword::Null), - ScalarValue::Boolean(v) => SimpleExpr::Constant(Value::Bool(*v)), - ScalarValue::Float32(v) => SimpleExpr::Constant(Value::Float(*v)), - ScalarValue::Float64(v) => SimpleExpr::Constant(Value::Double(*v)), - ScalarValue::Int8(v) => SimpleExpr::Constant(Value::TinyInt(*v)), - ScalarValue::Int16(v) => SimpleExpr::Constant(Value::SmallInt(*v)), - ScalarValue::Int32(v) => SimpleExpr::Constant(Value::Int(*v)), - ScalarValue::Int64(v) => SimpleExpr::Constant(Value::BigInt(*v)), - ScalarValue::UInt8(v) => SimpleExpr::Constant(Value::TinyUnsigned(*v)), - ScalarValue::UInt16(v) => SimpleExpr::Constant(Value::SmallUnsigned(*v)), - ScalarValue::UInt32(v) => SimpleExpr::Constant(Value::Unsigned(*v)), - ScalarValue::UInt64(v) => SimpleExpr::Constant(Value::BigUnsigned(*v)), - ScalarValue::Utf8(v) => match v { - Some(v) => SimpleExpr::Constant(Value::String(Some(Box::new(v.to_string())))), - None => SimpleExpr::Constant(Value::String(None)), - }, - ScalarValue::LargeUtf8(v) => match v { - Some(v) => SimpleExpr::Constant(Value::String(Some(Box::new(v.to_string())))), - None => SimpleExpr::Constant(Value::String(None)), - }, - ScalarValue::Binary(v) => match v { - Some(v) => SimpleExpr::Constant(Value::Bytes(Some(Box::new(v.to_vec())))), - None => SimpleExpr::Constant(Value::Bytes(None)), - }, - ScalarValue::FixedSizeBinary(_, v) => match v { - Some(v) => SimpleExpr::Constant(Value::Bytes(Some(Box::new(v.to_vec())))), - None => SimpleExpr::Constant(Value::Bytes(None)), - }, - ScalarValue::LargeBinary(v) => match v { - Some(v) => SimpleExpr::Constant(Value::Bytes(Some(Box::new(v.to_vec())))), - None => SimpleExpr::Constant(Value::Bytes(None)), - }, - // Extend with other types, e.g. to support arrays, dates, etc. - _ => SimpleExpr::Constant(Value::Bool(Some(true))) - } - } else if let Some(expr) = expr.as_any().downcast_ref::() { - let mut case = CaseStatement::new(); - for (when, then) in expr.when_then_expr() { - case = case.case(physical_expr_to_sea_query(when), physical_expr_to_sea_query(then)); - } - if let Some(else_exp) = expr.else_expr() { - case = case.finally(physical_expr_to_sea_query(else_exp)); - }; - SimpleExpr::Case(Box::new(case)) - } else { - SimpleExpr::Constant(Value::Bool(Some(true))) - } -} \ No newline at end of file