From 8e27cbc77355d20e6b35127980287d9dd6ff1bd2 Mon Sep 17 00:00:00 2001 From: Steve Russo <64294847+sjrusso8@users.noreply.github.com> Date: Sat, 27 Apr 2024 13:32:19 -0400 Subject: [PATCH] chore: reorganize code into a workspace --- .gitmodules | 4 +- Cargo.lock | 432 +++++++++------------- Cargo.toml | 80 ++-- README.md | 10 + core/Cargo.toml | 63 ++++ build.rs => core/build.rs | 6 + spark => core/spark | 0 {src => core/src}/catalog.rs | 0 {src => core/src}/client/mod.rs | 2 + {src => core/src}/column.rs | 27 +- {src => core/src}/dataframe.rs | 0 {src => core/src}/errors.rs | 0 {src => core/src}/expressions.rs | 14 +- {src => core/src}/functions/mod.rs | 57 ++- {src => core/src}/group.rs | 3 +- {src => core/src}/lib.rs | 12 +- {src => core/src}/plan.rs | 18 +- {src => core/src}/readwriter.rs | 0 {src => core/src}/session.rs | 55 ++- {src => core/src}/storage.rs | 0 {src => core/src}/streaming/mod.rs | 0 {src => core/src}/types.rs | 16 +- {src => core/src}/window.rs | 2 +- rust/Cargo.toml | 46 +++ {examples => rust/examples}/databricks.rs | 0 {examples => rust/examples}/delta.rs | 0 {examples => rust/examples}/reader.rs | 0 {examples => rust/examples}/readstream.rs | 0 {examples => rust/examples}/sql.rs | 0 {examples => rust/examples}/writer.rs | 0 rust/src/lib.rs | 4 + src/utils.rs | 104 ------ 32 files changed, 501 insertions(+), 454 deletions(-) create mode 100644 core/Cargo.toml rename build.rs => core/build.rs (80%) rename spark => core/spark (100%) rename {src => core/src}/catalog.rs (100%) rename {src => core/src}/client/mod.rs (99%) rename {src => core/src}/column.rs (95%) rename {src => core/src}/dataframe.rs (100%) rename {src => core/src}/errors.rs (100%) rename {src => core/src}/expressions.rs (95%) rename {src => core/src}/functions/mod.rs (93%) rename {src => core/src}/group.rs (99%) rename {src => core/src}/lib.rs (99%) rename {src => core/src}/plan.rs (97%) rename {src => core/src}/readwriter.rs (100%) rename {src => core/src}/session.rs (81%) rename {src => core/src}/storage.rs (100%) rename {src => core/src}/streaming/mod.rs (100%) rename {src => core/src}/types.rs (69%) rename {src => core/src}/window.rs (99%) create mode 100644 rust/Cargo.toml rename {examples => rust/examples}/databricks.rs (100%) rename {examples => rust/examples}/delta.rs (100%) rename {examples => rust/examples}/reader.rs (100%) rename {examples => rust/examples}/readstream.rs (100%) rename {examples => rust/examples}/sql.rs (100%) rename {examples => rust/examples}/writer.rs (100%) create mode 100644 rust/src/lib.rs delete mode 100644 src/utils.rs diff --git a/.gitmodules b/.gitmodules index b62da96..14744ba 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "spark"] - path = spark +[submodule "core/spark"] + path = core/spark url = https://github.com/apache/spark diff --git a/Cargo.lock b/Cargo.lock index 4cc56f7..f90968f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -295,7 +295,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn", ] [[package]] @@ -306,7 +306,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn", ] [[package]] @@ -324,32 +324,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" -[[package]] -name = "aws-lc-rs" -version = "1.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df33e4a55b03f8780ba55041bc7be91a2a8ec8c03517b0379d2d6c96d2c30d95" -dependencies = [ - "aws-lc-sys", - "mirai-annotations", - "paste", - "zeroize", -] - -[[package]] -name = "aws-lc-sys" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37ede3d6e360a48436fee127cb81710834407b1ec0c48a001cc29dec9005f73e" -dependencies = [ - "bindgen", - "cmake", - "dunce", - "fs_extra", - "libc", - "paste", -] - [[package]] name = "axum" version = "0.6.20" @@ -412,9 +386,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.3" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414dcefbc63d77c526a76b3afcf6fbb9b5e2791c19c3aa2297733208750c6e53" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "base64" @@ -422,29 +396,6 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" -[[package]] -name = "bindgen" -version = "0.69.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" -dependencies = [ - "bitflags 2.4.0", - "cexpr", - "clang-sys", - "itertools", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "syn 2.0.57", - "which", -] - [[package]] name = "bitflags" version = "1.3.2" @@ -463,11 +414,17 @@ version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" -version = "1.4.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cc" @@ -478,15 +435,6 @@ dependencies = [ "libc", ] -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - [[package]] name = "cfg-if" version = "1.0.0" @@ -507,31 +455,11 @@ dependencies = [ "windows-targets 0.52.4", ] -[[package]] -name = "clang-sys" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" -dependencies = [ - "glob", - "libc", - "libloading", -] - -[[package]] -name = "cmake" -version = "0.1.50" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" -dependencies = [ - "cc", -] - [[package]] name = "comfy-table" -version = "7.0.1" +version = "7.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab77dbd8adecaf3f0db40581631b995f312a8a5ae3aa9993188bb8f23d83a5b" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" dependencies = [ "strum", "strum_macros", @@ -603,12 +531,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "dunce" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" - [[package]] name = "either" version = "1.9.0" @@ -679,12 +601,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - [[package]] name = "futures-channel" version = "0.3.28" @@ -696,32 +612,54 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.28" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-io" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "futures-sink" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-util" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -731,8 +669,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -741,12 +681,6 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - [[package]] name = "h2" version = "0.3.24" @@ -797,9 +731,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.2" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "home" @@ -812,9 +746,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.9" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" dependencies = [ "bytes", "fnv", @@ -823,9 +757,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", "http", @@ -950,9 +884,9 @@ checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] @@ -963,12 +897,6 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "lexical-core" version = "0.8.5" @@ -1039,16 +967,6 @@ version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" -[[package]] -name = "libloading" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" -dependencies = [ - "cfg-if", - "windows-targets 0.52.4", -] - [[package]] name = "libm" version = "0.2.7" @@ -1095,12 +1013,6 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "miniz_oxide" version = "0.7.1" @@ -1121,28 +1033,12 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "mirai-annotations" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" - [[package]] name = "multimap" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - [[package]] name = "num" version = "0.4.1" @@ -1274,12 +1170,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "paste" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" - [[package]] name = "percent-encoding" version = "2.3.1" @@ -1298,22 +1188,22 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.3" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422" +checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.3" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" +checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn", ] [[package]] @@ -1341,7 +1231,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8832c0f9be7e3cae60727e6256cfd2cd3c3e2b6cd5dad4190ecb2fd658c9030b" dependencies = [ "proc-macro2", - "syn 2.0.57", + "syn", ] [[package]] @@ -1386,7 +1276,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.57", + "syn", "tempfile", "which", ] @@ -1401,7 +1291,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.57", + "syn", ] [[package]] @@ -1526,12 +1416,6 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc_version" version = "0.4.0" @@ -1568,21 +1452,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "rustls" -version = "0.23.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b3818d6051afeb6f88412bc8693cf8219799b2f2c2365f15e7534f0e198a16c" -dependencies = [ - "aws-lc-rs", - "log", - "once_cell", - "rustls-pki-types", - "rustls-webpki", - "subtle", - "zeroize", -] - [[package]] name = "rustls-native-certs" version = "0.7.0" @@ -1602,7 +1471,7 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f48172685e6ff52a556baa527774f61fcaa884f59daf3375c62a3f1cd2549dab" dependencies = [ - "base64 0.21.3", + "base64 0.21.7", "rustls-pki-types", ] @@ -1618,7 +1487,6 @@ version = "0.102.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610" dependencies = [ - "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -1697,7 +1565,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn", ] [[package]] @@ -1711,12 +1579,6 @@ dependencies = [ "serde", ] -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - [[package]] name = "slab" version = "0.4.9" @@ -1753,25 +1615,34 @@ dependencies = [ ] [[package]] -name = "spark-connect-rs" -version = "0.0.1-beta.3" +name = "spark-connect-core" +version = "0.0.0" dependencies = [ "arrow", "arrow-ipc", "chrono", + "getrandom", "parking_lot", "prost", "prost-types", "rand", "serde_json", "tokio", - "tokio-rustls 0.26.0", "tonic", "tonic-build", + "tonic-web-wasm-client", "url", "uuid", ] +[[package]] +name = "spark-connect-rs" +version = "0.0.1-beta.3" +dependencies = [ + "spark-connect-core", + "tokio", +] + [[package]] name = "spin" version = "0.9.8" @@ -1786,21 +1657,21 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strum" -version = "0.24.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" [[package]] name = "strum_macros" -version = "0.24.3" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" dependencies = [ "heck", "proc-macro2", "quote", "rustversion", - "syn 1.0.109", + "syn", ] [[package]] @@ -1809,17 +1680,6 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.57" @@ -1850,6 +1710,26 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "thiserror" +version = "1.0.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -1909,7 +1789,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn", ] [[package]] @@ -1918,18 +1798,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" dependencies = [ - "rustls 0.22.2", - "rustls-pki-types", - "tokio", -] - -[[package]] -name = "tokio-rustls" -version = "0.26.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" -dependencies = [ - "rustls 0.23.3", + "rustls", "rustls-pki-types", "tokio", ] @@ -1968,7 +1837,7 @@ dependencies = [ "async-stream", "async-trait", "axum", - "base64 0.21.3", + "base64 0.21.7", "bytes", "h2", "http", @@ -1982,7 +1851,7 @@ dependencies = [ "rustls-pemfile", "rustls-pki-types", "tokio", - "tokio-rustls 0.25.0", + "tokio-rustls", "tokio-stream", "tower", "tower-layer", @@ -2000,7 +1869,31 @@ dependencies = [ "proc-macro2", "prost-build", "quote", - "syn 2.0.57", + "syn", +] + +[[package]] +name = "tonic-web-wasm-client" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79bb296fba9974fbc7ea2f1364a661c0e7acb7ebfca648343e5a4ac44ec9a0ec" +dependencies = [ + "base64 0.21.7", + "byteorder", + "bytes", + "futures-util", + "http", + "http-body", + "httparse", + "js-sys", + "pin-project", + "thiserror", + "tonic", + "tower-service", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", ] [[package]] @@ -2055,7 +1948,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn", ] [[package]] @@ -2096,9 +1989,9 @@ dependencies = [ [[package]] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" [[package]] name = "untrusted" @@ -2149,9 +2042,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2159,24 +2052,36 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.57", + "syn", "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2184,22 +2089,45 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.57", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "wasm-streams" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "web-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +dependencies = [ + "js-sys", + "wasm-bindgen", +] [[package]] name = "which" diff --git a/Cargo.toml b/Cargo.toml index 35f52f4..498b1f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,67 +1,39 @@ -[package] -name = "spark-connect-rs" -version = "0.0.1-beta.3" -edition = "2021" -license = "Apache-2.0" -description = "Apache Spark Connect Client for Rust" +[workspace] +members = ["core", "rust"] +resolver = "2" + +[workspace.package] +authors = ["Steve Russo <64294847+sjrusso8@users.noreply.github.com>"] keywords = ["spark", "spark_connect"] -documentation = "https://docs.rs/spark-connect-rs" readme = "README.md" +edition = "2021" homepage = "https://github.com/sjrusso8/spark-connect-rs" +description = "Apache Spark Connect Client for Rust" +license = "Apache-2.0" +documentation = "https://docs.rs/spark-connect-rs" repository = "https://github.com/sjrusso8/spark-connect-rs" -include = [ - "build.rs", - "docker-compose.yml", - "examples/**/*", - "src/**/*", - "spark/connector/connect/common/src/main/protobuf/spark/**/*", -] - -[dependencies] -tonic = { version ="0.11.0"} -tokio = { version = "1.37.0", features = ["macros", "rt-multi-thread"] } -tokio-rustls = "0.26.0" - -arrow = { version = "51.0.0", features = ["prettyprint"] } -arrow-ipc = "51.0.0" - -serde_json = "1.0.115" - -parking_lot = { version = "0.12.1", features = ["send_guard"]} - -prost = "0.12.0" -prost-types = "0.12.0" - -rand = "0.8.5" -uuid = { version = "1.8.0", features = ["v4"] } -url = "2.5" - -chrono = "0.4.35" +rust-version = "1.76" -[build-dependencies] -tonic-build = "0.11.0" +[workspace.dependencies] +tonic = { version ="0.11", default-features = false } +tonic-web-wasm-client = { version = "0.5" } -[lib] -doctest = false +tokio = { version = "1.37", default-features = false, features = ["macros", "rt-multi-thread"] } -[features] -tls = ["tonic/tls", "tonic/tls-roots"] +arrow = { version = "51", features = ["prettyprint"] } +arrow-ipc = { version = "51" } -[[example]] -name = "reader" +serde_json = { version = "1" } -[[example]] -name = "sql" +parking_lot = { version = "0.12", features = ["send_guard"]} -[[example]] -name = "writer" +prost = { version = "0.12" } +prost-types = { version = "0.12" } -[[example]] -name = "delta" +rand = { version = "0.8" } +uuid = { version = "1.8", features = ["v4"] } +url = { version = "2.5" } -[[example]] -name = "readstream" +chrono = { version = "0.4" } -[[example]] -name = "databricks" -required-features = ["tls"] +getrandom = { version = "0.2" } diff --git a/README.md b/README.md index e9a933e..feda476 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,16 @@ of interacting with Spark cluster from rust. The `spark-connect-rs` aims to provide an entrypoint to [Spark Connect](https://spark.apache.org/docs/latest/spark-connect-overview.html), and provide *similar* DataFrame API interactions. +## Project Layout + +``` +├── core <- core implementation in Rust +│ └─ spark <- git submodule for apache/spark +├── rust <- shim for 'spark-connect-rs' from core +``` + +Future state would be to have additional bindings for other languages along side the top level `rust` folder. + ## Getting Started This section explains how run Spark Connect Rust locally starting from 0. diff --git a/core/Cargo.toml b/core/Cargo.toml new file mode 100644 index 0000000..339feb4 --- /dev/null +++ b/core/Cargo.toml @@ -0,0 +1,63 @@ +[package] +name = "spark-connect-core" +version = "0.0.0" +authors.workspace = true +keywords.workspace = true +readme.workspace = true +edition.workspace = true +homepage.workspace = true +description.workspace = true +license.workspace = true +documentation.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +tonic = { workspace = true, default-features = false, optional = true } +tonic-web-wasm-client = { workspace = true, optional = true } + +tokio = { workspace = true, optional = true } + +arrow = { workspace = true } +arrow-ipc = { workspace = true } + +serde_json = { workspace = true } + +parking_lot = { workspace = true } + +prost = { workspace = true } +prost-types = { workspace = true } + +rand = { workspace = true } +uuid = { workspace = true } +url = { workspace = true } + +chrono = { workspace = true } + +getrandom = { workspace = true, optional = true } + +[build-dependencies] +tonic-build = "0.11.0" + +[lib] +doctest = false + +[features] +default = [ + "tokio", + "tonic/codegen", + "tonic/prost", + "tonic/transport", +] + +tls = [ + "tonic/tls", + "tonic/tls-roots" +] + +wasm = [ + "tonic-web-wasm-client", + "tonic/codegen", + "tonic/prost", + "getrandom/js", +] diff --git a/build.rs b/core/build.rs similarity index 80% rename from build.rs rename to core/build.rs index 06b635f..1f14a16 100644 --- a/build.rs +++ b/core/build.rs @@ -10,10 +10,16 @@ fn main() -> Result<(), Box> { file_paths.push(entry.to_str().unwrap().to_string()); } + #[cfg(feature = "wasm")] + let transport = false; + #[cfg(not(feature = "wasm"))] + let transport = true; + tonic_build::configure() .protoc_arg("--experimental_allow_proto3_optional") .build_server(false) .build_client(true) + .build_transport(transport) .compile( file_paths.as_ref(), &["./spark/connector/connect/common/src/main/protobuf"], diff --git a/spark b/core/spark similarity index 100% rename from spark rename to core/spark diff --git a/src/catalog.rs b/core/src/catalog.rs similarity index 100% rename from src/catalog.rs rename to core/src/catalog.rs diff --git a/src/client/mod.rs b/core/src/client/mod.rs similarity index 99% rename from src/client/mod.rs rename to core/src/client/mod.rs index 583b228..72942d8 100644 --- a/src/client/mod.rs +++ b/core/src/client/mod.rs @@ -1,3 +1,5 @@ +//! Generic implementation of ChannelBuilder and SparkConnectClient + use std::collections::HashMap; use std::env; use std::str::FromStr; diff --git a/src/column.rs b/core/src/column.rs similarity index 95% rename from src/column.rs rename to core/src/column.rs index 6c22a6d..e6f6732 100644 --- a/src/column.rs +++ b/core/src/column.rs @@ -5,8 +5,7 @@ use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Not, Rem, Sub}; use crate::spark; use crate::expressions::{ToExpr, ToLiteralExpr}; -use crate::functions::lit; -use crate::utils::invoke_func; +use crate::functions::invoke_func; use crate::window::WindowSpec; /// # Column @@ -253,44 +252,32 @@ impl Column { /// df.filter(col("name").contains("ge")); /// ``` pub fn contains(self, other: T) -> Column { - let value = lit(other); - - invoke_func("contains", vec![self, value]) + invoke_func("contains", vec![self.to_expr(), other.to_literal_expr()]) } /// A filter expression that evaluates if the column startswith a string literal pub fn startswith(self, other: T) -> Column { - let value = lit(other); - - invoke_func("startswith", vec![self, value]) + invoke_func("startswith", vec![self.to_expr(), other.to_literal_expr()]) } /// A filter expression that evaluates if the column endswith a string literal pub fn endswith(self, other: T) -> Column { - let value = lit(other); - - invoke_func("endswith", vec![self, value]) + invoke_func("endswith", vec![self.to_expr(), other.to_literal_expr()]) } /// A SQL LIKE filter expression that evaluates the column based on a case sensitive match pub fn like(self, other: T) -> Column { - let value = lit(other); - - invoke_func("like", vec![self, value]) + invoke_func("like", vec![self.to_expr(), other.to_literal_expr()]) } /// A SQL ILIKE filter expression that evaluates the column based on a case insensitive match pub fn ilike(self, other: T) -> Column { - let value = lit(other); - - invoke_func("ilike", vec![self, value]) + invoke_func("ilike", vec![self.to_expr(), other.to_literal_expr()]) } /// A SQL RLIKE filter expression that evaluates the column based on a regex match pub fn rlike(self, other: T) -> Column { - let value = lit(other); - - invoke_func("rlike", vec![self, value]) + invoke_func("rlike", vec![self.to_expr(), other.to_literal_expr()]) } /// Equality comparion. Cannot overload the '==' and return something other diff --git a/src/dataframe.rs b/core/src/dataframe.rs similarity index 100% rename from src/dataframe.rs rename to core/src/dataframe.rs diff --git a/src/errors.rs b/core/src/errors.rs similarity index 100% rename from src/errors.rs rename to core/src/errors.rs diff --git a/src/expressions.rs b/core/src/expressions.rs similarity index 95% rename from src/expressions.rs rename to core/src/expressions.rs index 491dc99..ccee3ad 100644 --- a/src/expressions.rs +++ b/core/src/expressions.rs @@ -21,8 +21,6 @@ use crate::spark; use crate::column::Column; use crate::types::ToDataType; -use crate::impl_to_literal; - const MICROSECONDS: i32 = 1000000; /// Translate string values into a `spark::Expression` @@ -114,6 +112,18 @@ pub trait ToLiteral { fn to_literal(&self) -> spark::expression::Literal; } +macro_rules! impl_to_literal { + ($type:ty, $inner_type:ident) => { + impl ToLiteral for $type { + fn to_literal(&self) -> spark::expression::Literal { + spark::expression::Literal { + literal_type: Some(spark::expression::literal::LiteralType::$inner_type(*self)), + } + } + } + }; +} + impl_to_literal!(bool, Boolean); impl_to_literal!(i32, Integer); impl_to_literal!(i64, Long); diff --git a/src/functions/mod.rs b/core/src/functions/mod.rs similarity index 93% rename from src/functions/mod.rs rename to core/src/functions/mod.rs index aaf2b92..23b457e 100644 --- a/src/functions/mod.rs +++ b/core/src/functions/mod.rs @@ -5,13 +5,23 @@ use crate::spark; use crate::DataFrame; use crate::column::Column; -use expressions::{ToExpr, ToLiteralExpr}; - -use crate::generate_functions; -use crate::utils::invoke_func; +use expressions::{ToExpr, ToLiteralExpr, ToVecExpr}; use rand::random; +pub fn invoke_func(name: &str, args: T) -> Column { + Column::from(spark::Expression { + expr_type: Some(spark::expression::ExprType::UnresolvedFunction( + spark::expression::UnresolvedFunction { + function_name: name.to_string(), + arguments: args.to_vec_expr(), + is_distinct: false, + is_user_defined_function: false, + }, + )), + }) +} + /// Create a column from a &str pub fn col(value: &str) -> Column { Column::from(value) @@ -260,6 +270,45 @@ pub fn desc_nulls_last(col: T) -> Column { Column::from(col.to_literal_expr()).desc_nulls_last() } +macro_rules! generate_functions { + (no_args: $($func_name:ident),*) => { + $( + pub fn $func_name() -> Column { + let empty_args: Vec = vec![]; + invoke_func(stringify!($func_name), empty_args) + } + )* + }; + (one_col: $($func_name:ident),*) => { + $( + pub fn $func_name(col: T) -> Column + where + Vec: expressions::ToVecExpr, + { + invoke_func(stringify!($func_name), vec![col]) + } + )* + }; + (two_cols: $($func_name:ident),*) => { + $( + pub fn $func_name(col1: T, col2: T) -> Column + where + Vec: expressions::ToVecExpr, + { + invoke_func(stringify!($func_name), vec![col1, col2]) + } + )* + }; + (multiple_cols: $($func_name:ident),*) => { + $( + pub fn $func_name(cols: T) -> Column + { + invoke_func(stringify!($func_name), cols) + } + )* + }; +} + // functions that require no arguments generate_functions!( no_args: pi, input_file_name, diff --git a/src/group.rs b/core/src/group.rs similarity index 99% rename from src/group.rs rename to core/src/group.rs index 65b9b81..81a1416 100644 --- a/src/group.rs +++ b/core/src/group.rs @@ -4,8 +4,7 @@ use crate::dataframe::DataFrame; use crate::expressions::{ToExpr, ToLiteral, ToVecExpr}; use crate::plan::LogicalPlanBuilder; -use crate::functions::lit; -use crate::utils::invoke_func; +use crate::functions::{invoke_func, lit}; use crate::spark; use crate::spark::aggregate::GroupType; diff --git a/src/lib.rs b/core/src/lib.rs similarity index 99% rename from src/lib.rs rename to core/src/lib.rs index 3f9a045..0f8ca9b 100644 --- a/src/lib.rs +++ b/core/src/lib.rs @@ -115,22 +115,20 @@ pub mod spark { tonic::include_proto!("spark.connect"); } -pub mod dataframe; -pub mod plan; -pub mod readwriter; -pub mod session; - pub mod catalog; -mod client; +pub mod client; pub mod column; +pub mod dataframe; pub mod errors; pub mod expressions; pub mod functions; pub mod group; +pub mod plan; +pub mod readwriter; +pub mod session; pub mod storage; pub mod streaming; pub mod types; -mod utils; pub mod window; pub use dataframe::{DataFrame, DataFrameReader, DataFrameWriter}; diff --git a/src/plan.rs b/core/src/plan.rs similarity index 97% rename from src/plan.rs rename to core/src/plan.rs index 835ee0c..e46ba29 100644 --- a/src/plan.rs +++ b/core/src/plan.rs @@ -6,7 +6,6 @@ use std::sync::Mutex; use crate::errors::SparkError; use crate::expressions::{ToExpr, ToFilterExpr, ToVecExpr}; use crate::spark; -use crate::utils::sort_order; use arrow::array::RecordBatch; use arrow_ipc::writer::StreamWriter; @@ -635,6 +634,23 @@ impl LogicalPlanBuilder { } } +pub fn sort_order(cols: I) -> Vec +where + T: ToExpr, + I: IntoIterator, +{ + cols.into_iter() + .map(|col| match col.to_expr().expr_type.unwrap() { + spark::expression::ExprType::SortOrder(ord) => *ord, + _ => spark::expression::SortOrder { + child: Some(Box::new(col.to_expr())), + direction: 1, + null_ordering: 1, + }, + }) + .collect() +} + fn serialize(batch: &RecordBatch) -> Result, SparkError> { let buffer: Vec = Vec::new(); let schema = &batch.schema(); diff --git a/src/readwriter.rs b/core/src/readwriter.rs similarity index 100% rename from src/readwriter.rs rename to core/src/readwriter.rs diff --git a/src/session.rs b/core/src/session.rs similarity index 81% rename from src/session.rs rename to core/src/session.rs index 52ea5aa..e19921b 100644 --- a/src/session.rs +++ b/core/src/session.rs @@ -4,22 +4,27 @@ use std::collections::HashMap; use std::sync::Arc; use crate::catalog::Catalog; -use crate::client::ChannelBuilder; -use crate::client::{MetadataInterceptor, SparkConnectClient}; use crate::dataframe::{DataFrame, DataFrameReader}; -use crate::errors::SparkError; use crate::plan::LogicalPlanBuilder; use crate::spark; use crate::streaming::DataStreamReader; + +use crate::client::{ChannelBuilder, MetadataInterceptor, SparkConnectClient}; +use crate::errors::SparkError; use spark::spark_connect_service_client::SparkConnectServiceClient; use arrow::record_batch::RecordBatch; use parking_lot::RwLock; -use tonic::service::interceptor::InterceptedService; +#[cfg(not(feature = "wasm"))] use tonic::transport::{Channel, Endpoint}; +#[cfg(feature = "wasm")] +use tonic_web_wasm_client::Client; + +use tonic::service::interceptor::InterceptedService; + /// SparkSessionBuilder creates a remote Spark Session a connection string. /// /// The connection string is define based on the requirements from [Spark Documentation](https://github.com/apache/spark/blob/master/connector/connect/docs/client-connection-string.md) @@ -51,6 +56,7 @@ impl SparkSessionBuilder { Self::new(connection) } + #[cfg(not(feature = "wasm"))] async fn create_client(&self) -> Result { let channel = Endpoint::from_shared(self.channel_builder.endpoint()) .expect("Failed to create endpoint") @@ -74,6 +80,26 @@ impl SparkSessionBuilder { Ok(SparkSession::new(spark_connnect_client)) } + #[cfg(feature = "wasm")] + async fn create_client(&self) -> Result { + let inner = Client::new(self.channel_builder.endpoint()); + + let service_client = SparkConnectServiceClient::with_interceptor( + inner, + MetadataInterceptor::new( + self.channel_builder.token().to_owned(), + self.channel_builder.headers().to_owned(), + ), + ); + + let client = Arc::new(RwLock::new(service_client)); + + let spark_connnect_client = + SparkConnectClient::new(client.clone(), self.channel_builder.clone()); + + Ok(SparkSession::new(spark_connnect_client)) + } + /// Attempt to connect to a remote Spark Session /// /// and return a [SparkSession] @@ -86,12 +112,17 @@ impl SparkSessionBuilder { /// using the Spark Connection gRPC protocol. #[derive(Clone, Debug)] pub struct SparkSession { + #[cfg(not(feature = "wasm"))] client: SparkConnectClient>, + #[cfg(feature = "wasm")] + client: SparkConnectClient>, + session_id: String, } impl SparkSession { + #[cfg(not(feature = "wasm"))] pub fn new( client: SparkConnectClient>, ) -> Self { @@ -100,6 +131,16 @@ impl SparkSession { client, } } + + #[cfg(feature = "wasm")] + pub fn new( + client: SparkConnectClient>, + ) -> Self { + Self { + session_id: client.session_id(), + client, + } + } /// Create a [DataFrame] with a spingle column named `id`, /// containing elements in a range from `start` (default 0) to /// `end` (exclusive) with a step value `step`, and control the number @@ -177,9 +218,15 @@ impl SparkSession { } /// Spark Connection gRPC client interface + #[cfg(not(feature = "wasm"))] pub fn client(self) -> SparkConnectClient> { self.client } + + #[cfg(feature = "wasm")] + pub fn client(self) -> SparkConnectClient> { + self.client + } } #[cfg(test)] diff --git a/src/storage.rs b/core/src/storage.rs similarity index 100% rename from src/storage.rs rename to core/src/storage.rs diff --git a/src/streaming/mod.rs b/core/src/streaming/mod.rs similarity index 100% rename from src/streaming/mod.rs rename to core/src/streaming/mod.rs diff --git a/src/types.rs b/core/src/types.rs similarity index 69% rename from src/types.rs rename to core/src/types.rs index 9b5f407..4f230a3 100644 --- a/src/types.rs +++ b/core/src/types.rs @@ -1,12 +1,26 @@ //! Rust Types to Spark Types -use crate::impl_to_data_type; use crate::spark; pub trait ToDataType { fn to_proto_type(&self) -> spark::DataType; } +macro_rules! impl_to_data_type { + ($type:ty, $inner_type:ident) => { + impl ToDataType for $type { + fn to_proto_type(&self) -> spark::DataType { + spark::DataType { + kind: Some(spark::data_type::Kind::$inner_type( + spark::data_type::$inner_type { + type_variation_reference: 0, + }, + )), + } + } + } + }; +} // Call the macro with the input pairs impl_to_data_type!(bool, Boolean); impl_to_data_type!(i16, Short); diff --git a/src/window.rs b/core/src/window.rs similarity index 99% rename from src/window.rs rename to core/src/window.rs index b7f1ca8..3618fbe 100644 --- a/src/window.rs +++ b/core/src/window.rs @@ -1,7 +1,7 @@ //! Utility structs for defining a window over a DataFrame use crate::expressions::{ToExpr, ToLiteralExpr, ToVecExpr}; -use crate::utils::sort_order; +use crate::plan::sort_order; use crate::spark; use crate::spark::expression::window; diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 0000000..cf1a1b9 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,46 @@ +[package] +name = "spark-connect-rs" +version = "0.0.1-beta.3" +authors.workspace = true +keywords.workspace = true +readme.workspace = true +edition.workspace = true +homepage.workspace = true +description.workspace = true +license.workspace = true +documentation.workspace = true +repository.workspace = true +rust-version.workspace = true +include = [ + "examples/**/*", +] + +[dependencies] +tokio = { workspace = true } + +spark-connect-core = { version = "0.0.0", path = "../core" } + +[features] +tls = ["spark-connect-core/tls"] + +[lib] +doctest = false + +[[example]] +name = "reader" + +[[example]] +name = "sql" + +[[example]] +name = "writer" + +[[example]] +name = "delta" + +[[example]] +name = "readstream" + +[[example]] +name = "databricks" +required-features = ["tls"] diff --git a/examples/databricks.rs b/rust/examples/databricks.rs similarity index 100% rename from examples/databricks.rs rename to rust/examples/databricks.rs diff --git a/examples/delta.rs b/rust/examples/delta.rs similarity index 100% rename from examples/delta.rs rename to rust/examples/delta.rs diff --git a/examples/reader.rs b/rust/examples/reader.rs similarity index 100% rename from examples/reader.rs rename to rust/examples/reader.rs diff --git a/examples/readstream.rs b/rust/examples/readstream.rs similarity index 100% rename from examples/readstream.rs rename to rust/examples/readstream.rs diff --git a/examples/sql.rs b/rust/examples/sql.rs similarity index 100% rename from examples/sql.rs rename to rust/examples/sql.rs diff --git a/examples/writer.rs b/rust/examples/writer.rs similarity index 100% rename from examples/writer.rs rename to rust/examples/writer.rs diff --git a/rust/src/lib.rs b/rust/src/lib.rs new file mode 100644 index 0000000..f206d25 --- /dev/null +++ b/rust/src/lib.rs @@ -0,0 +1,4 @@ +/** + * The spark-connect-rs crate is currently just a meta-package shim for spark-connect-core + */ +pub use spark_connect_core::*; diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index 10bc925..0000000 --- a/src/utils.rs +++ /dev/null @@ -1,104 +0,0 @@ -use crate::spark; - -use crate::column::Column; -use crate::expressions::{ToExpr, ToVecExpr}; - -pub fn invoke_func(name: &str, args: T) -> Column { - Column::from(spark::Expression { - expr_type: Some(spark::expression::ExprType::UnresolvedFunction( - spark::expression::UnresolvedFunction { - function_name: name.to_string(), - arguments: args.to_vec_expr(), - is_distinct: false, - is_user_defined_function: false, - }, - )), - }) -} - -pub fn sort_order(cols: I) -> Vec -where - T: ToExpr, - I: IntoIterator, -{ - cols.into_iter() - .map(|col| match col.to_expr().expr_type.unwrap() { - spark::expression::ExprType::SortOrder(ord) => *ord, - _ => spark::expression::SortOrder { - child: Some(Box::new(col.to_expr())), - direction: 1, - null_ordering: 1, - }, - }) - .collect() -} - -#[macro_export] -macro_rules! generate_functions { - (no_args: $($func_name:ident),*) => { - $( - pub fn $func_name() -> Column { - let empty_args: Vec = vec![]; - invoke_func(stringify!($func_name), empty_args) - } - )* - }; - (one_col: $($func_name:ident),*) => { - $( - pub fn $func_name(col: T) -> Column - where - Vec: expressions::ToVecExpr, - { - invoke_func(stringify!($func_name), vec![col]) - } - )* - }; - (two_cols: $($func_name:ident),*) => { - $( - pub fn $func_name(col1: T, col2: T) -> Column - where - Vec: expressions::ToVecExpr, - { - invoke_func(stringify!($func_name), vec![col1, col2]) - } - )* - }; - (multiple_cols: $($func_name:ident),*) => { - $( - pub fn $func_name(cols: T) -> Column - { - invoke_func(stringify!($func_name), cols) - } - )* - }; -} - -#[macro_export] -macro_rules! impl_to_data_type { - ($type:ty, $inner_type:ident) => { - impl ToDataType for $type { - fn to_proto_type(&self) -> spark::DataType { - spark::DataType { - kind: Some(spark::data_type::Kind::$inner_type( - spark::data_type::$inner_type { - type_variation_reference: 0, - }, - )), - } - } - } - }; -} - -#[macro_export] -macro_rules! impl_to_literal { - ($type:ty, $inner_type:ident) => { - impl ToLiteral for $type { - fn to_literal(&self) -> spark::expression::Literal { - spark::expression::Literal { - literal_type: Some(spark::expression::literal::LiteralType::$inner_type(*self)), - } - } - } - }; -}