-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(dataframe): implement core traits (#6)
* feat(dataframe): implement core traits - implement more basic traits for DataFrame - change interface to leverage Enums where possible (jointype, savemode, storagelevel) - create storage.rs with enum wrapper around spark::StorageLevel - update tests - update docker-compose.yml with delta-spark * update docs and examples * update readme
- Loading branch information
Showing
11 changed files
with
1,608 additions
and
327 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// This example demonstrates creating a Spark DataFrame from a CSV with read options | ||
// and then adding transformations for 'select' & 'sort' | ||
// The resulting dataframe is saved in the `delta` format as a `managed` table | ||
// and `spark.sql` queries are run against the delta table | ||
// | ||
// The remote spark session must have the spark package `io.delta:delta-spark_2.12:{DELTA_VERSION}` enabled. | ||
// Where the `DELTA_VERSION` is the specified Delta Lake version. | ||
|
||
use spark_connect_rs::{SparkSession, SparkSessionBuilder}; | ||
|
||
use spark_connect_rs::dataframe::SaveMode; | ||
|
||
#[tokio::main] | ||
async fn main() -> Result<(), Box<dyn std::error::Error>> { | ||
let mut spark: SparkSession = SparkSessionBuilder::default().build().await?; | ||
|
||
let paths = vec!["/opt/spark/examples/src/main/resources/people.csv".to_string()]; | ||
|
||
let df = spark | ||
.clone() | ||
.read() | ||
.format("csv") | ||
.option("header", "True") | ||
.option("delimiter", ";") | ||
.option("inferSchema", "True") | ||
.load(paths); | ||
|
||
df.write() | ||
.format("delta") | ||
.mode(SaveMode::Overwrite) | ||
.saveAsTable("default.people_delta") | ||
.await | ||
.unwrap(); | ||
|
||
spark | ||
.sql("DESCRIBE HISTORY default.people_delta") | ||
.await? | ||
.show(Some(1), None, Some(true)) | ||
.await | ||
.unwrap(); | ||
|
||
// print results | ||
// +-------------------------------------------------------------------------------------------------------+ | ||
// | show_string | | ||
// +-------------------------------------------------------------------------------------------------------+ | ||
// | -RECORD 0-------------------------------------------------------------------------------------------- | | ||
// | version | 3 | | ||
// | timestamp | 2024-03-16 13:46:23.552 | | ||
// | userId | NULL | | ||
// | userName | NULL | | ||
// | operation | CREATE OR REPLACE TABLE AS SELECT | | ||
// | operationParameters | {isManaged -> true, description -> NULL, partitionBy -> [], properties -> {}} | | ||
// | job | NULL | | ||
// | notebook | NULL | | ||
// | clusterId | NULL | | ||
// | readVersion | 2 | | ||
// | isolationLevel | Serializable | | ||
// | isBlindAppend | false | | ||
// | operationMetrics | {numFiles -> 1, numOutputRows -> 2, numOutputBytes -> 988} | | ||
// | userMetadata | NULL | | ||
// | engineInfo | Apache-Spark/3.5.0 Delta-Lake/3.0.0 | | ||
// | only showing top 1 row | | ||
// | | | ||
// +-------------------------------------------------------------------------------------------------------+ | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.