-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #30 from moj-analytical-services/pydbtools_wrapper
Pydbtools wrapper
- Loading branch information
Showing
47 changed files
with
2,393 additions
and
360 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,8 @@ | ||
^requirements\.txt$ | ||
^renv$ | ||
^renv\.lock$ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ | ||
^LICENSE\.md$ | ||
^doc$ | ||
^Meta$ |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,6 @@ | |
.Rhistory | ||
.RData | ||
.Ruserdata | ||
renv | ||
.Rprofile | ||
/Meta/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,20 @@ | ||
Package: dbtools | ||
Type: Package | ||
Title: Uses R wrapper function to send queries to athena. | ||
Version: 2.0.3 | ||
Version: 3.0.0 | ||
Author: Karik Isichei | ||
Maintainer: The package maintainer <[email protected]> | ||
Description: See title. | ||
License: What license is it under? | ||
License: MIT + file LICENSE | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 6.0.1 | ||
RoxygenNote: 7.1.1 | ||
Imports: | ||
reticulate (>= 1.10), | ||
s3tools, | ||
readr | ||
magrittr, | ||
reticulate, | ||
arrow | ||
Suggests: | ||
data.table (>= 1.11.8) | ||
Remotes: | ||
moj-analytical-services/s3tools | ||
|
||
knitr, | ||
data.table (>= 1.11.8), | ||
rmarkdown, | ||
tibble | ||
VignetteBuilder: knitr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2022 | ||
COPYRIGHT HOLDER: Ministry of Justice |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# MIT License | ||
|
||
Copyright (c) 2022 Ministry of Justice | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,21 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(get_athena_query_response) | ||
export("%>%") | ||
export(create_temp_table) | ||
export(delete_database_and_data) | ||
export(delete_partitions_and_data) | ||
export(delete_table_and_data) | ||
export(describe_table) | ||
export(get_query_columns_types) | ||
export(get_query_execution) | ||
export(get_sql_from_file) | ||
export(read_sql) | ||
import(readr) | ||
import(reticulate) | ||
import(s3tools) | ||
export(read_sql_query) | ||
export(render_sql_template) | ||
export(repair_table) | ||
export(show_create_table) | ||
export(start_query_execution) | ||
export(start_query_execution_and_wait) | ||
export(stop_query_execution) | ||
export(wait_query) | ||
importFrom(magrittr,"%>%") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#' dbtools: A package for accessing AWS Athena from the Analytical Platform. | ||
#' | ||
#' @section About: | ||
#' The dbtools package is used to run SQL queries configured for the | ||
#' Analytical Platform. This package is a reticulated | ||
#' wrapper around the Python library pydbtools | ||
#' which uses AWS Wrangler's Athena module but adds additional functionality | ||
#' (like Jinja templating, creating temporary tables) and alters some configuration | ||
#' to our specification. | ||
#' | ||
#' Alternatively you might want to use | ||
#' Rdbtools, which has the | ||
#' advantages of being R-native, so no messing with `reticulate` and Python, and | ||
#' supporting `dbplyr`. Please note the caveat about support, though. | ||
#' | ||
#' @seealso \url{https://github.com/moj-analytical-services/pydbtools} | ||
#' @seealso \url{https://github.com/moj-analytical-services/Rdbtools} | ||
#' | ||
#' @docType package | ||
#' @name dbtools | ||
NULL | ||
#> NULL |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#' Send an SQL query to Athena and receive a dataframe. | ||
#' | ||
#' @param sql An SQL query | ||
#' | ||
#' @return Dataframe or tibble if the tibble library is loaded. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' `df <- dbtools::read_sql_query('select * from my_db.my_table')` | ||
read_sql_query <- function(sql) { | ||
# Download the dataframe result to a parquet temporary file as pandas and | ||
# reticulate are frequently incompatible, and load the data into R using | ||
# arrow. | ||
tmp_location <- tempfile(fileext=".parquet") | ||
dbtools.env$pydb$save_query_to_parquet(sql, tmp_location) | ||
df <- arrow::read_parquet(tmp_location) | ||
unlink(tmp_location) | ||
return(df) | ||
} | ||
|
||
#' Uses boto3 (in python) to send an sql query to athena and return an R dataframe, tibble or data.table based on user preference. | ||
#' | ||
#' @export | ||
#' | ||
#' @details Will send an SQL query to Athena and wait for it to complete. Once the query has completed the resulting sql query will be read using arrow. | ||
#' Function returns dataframe. If needing more a more bespoke or self defined data reading function and arguments use dbtools::start_query_and_wait to send an SQL query and return the s3 path to data in csv format. | ||
#' | ||
#' @param sql_query A string specifying the SQL query you want to send to athena. See packages github readme for info on the flavour of SQL Athena uses. | ||
#' @param return_df_as String specifying what the table should be returned as i.e. 'dataframe', 'tibble' (converts data using tibble::as_tibble) or 'data.table' (converts data using data.table::as.data.table). Default is 'tibble'. Not all tables returned are a DataFrame class. | ||
#' | ||
#' @return A table as a dataframe, tibble or data.table | ||
#' | ||
#' @examples | ||
#' # Read an sql query returning a tibble | ||
#' ``` | ||
#' df <- dbtools::read_sql( | ||
#' "SELECT * from crest_v1.flatfile limit 10000", | ||
#' return_df_as="tibble" | ||
#' ) | ||
#' ``` | ||
read_sql <- function(sql_query, return_df_as="tibble") { | ||
df <- read_sql_query(sql_query) | ||
if (return_df_as == "dataframe") { | ||
return(as.data.frame(df)) | ||
} else if (return_df_as == "data.table") { | ||
return(data.table::as.data.table(df)) | ||
} else { | ||
return(tibble::as_tibble(df)) | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.