Skip to content

Commit

Permalink
Merge pull request #30 from moj-analytical-services/pydbtools_wrapper
Browse files Browse the repository at this point in the history
Pydbtools wrapper
  • Loading branch information
mratford authored Feb 23, 2022
2 parents 6dbc029 + aa08f9e commit 8b3809f
Show file tree
Hide file tree
Showing 47 changed files with 2,393 additions and 360 deletions.
6 changes: 6 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
^requirements\.txt$
^renv$
^renv\.lock$
^.*\.Rproj$
^\.Rproj\.user$
^LICENSE\.md$
^doc$
^Meta$
53 changes: 0 additions & 53 deletions .circleci/config.yml

This file was deleted.

3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
.Rhistory
.RData
.Ruserdata
renv
.Rprofile
/Meta/
22 changes: 11 additions & 11 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
Package: dbtools
Type: Package
Title: Uses R wrapper function to send queries to athena.
Version: 2.0.3
Version: 3.0.0
Author: Karik Isichei
Maintainer: The package maintainer <[email protected]>
Description: See title.
License: What license is it under?
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.0.1
RoxygenNote: 7.1.1
Imports:
reticulate (>= 1.10),
s3tools,
readr
magrittr,
reticulate,
arrow
Suggests:
data.table (>= 1.11.8)
Remotes:
moj-analytical-services/s3tools

knitr,
data.table (>= 1.11.8),
rmarkdown,
tibble
VignetteBuilder: knitr
2 changes: 2 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
YEAR: 2022
COPYRIGHT HOLDER: Ministry of Justice
21 changes: 21 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# MIT License

Copyright (c) 2022 Ministry of Justice

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
22 changes: 18 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
# Generated by roxygen2: do not edit by hand

export(get_athena_query_response)
export("%>%")
export(create_temp_table)
export(delete_database_and_data)
export(delete_partitions_and_data)
export(delete_table_and_data)
export(describe_table)
export(get_query_columns_types)
export(get_query_execution)
export(get_sql_from_file)
export(read_sql)
import(readr)
import(reticulate)
import(s3tools)
export(read_sql_query)
export(render_sql_template)
export(repair_table)
export(show_create_table)
export(start_query_execution)
export(start_query_execution_and_wait)
export(stop_query_execution)
export(wait_query)
importFrom(magrittr,"%>%")
22 changes: 22 additions & 0 deletions R/dbtools.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' dbtools: A package for accessing AWS Athena from the Analytical Platform.
#'
#' @section About:
#' The dbtools package is used to run SQL queries configured for the
#' Analytical Platform. This package is a reticulated
#' wrapper around the Python library pydbtools
#' which uses AWS Wrangler's Athena module but adds additional functionality
#' (like Jinja templating, creating temporary tables) and alters some configuration
#' to our specification.
#'
#' Alternatively you might want to use
#' Rdbtools, which has the
#' advantages of being R-native, so no messing with `reticulate` and Python, and
#' supporting `dbplyr`. Please note the caveat about support, though.
#'
#' @seealso \url{https://github.com/moj-analytical-services/pydbtools}
#' @seealso \url{https://github.com/moj-analytical-services/Rdbtools}
#'
#' @docType package
#' @name dbtools
NULL
#> NULL
40 changes: 0 additions & 40 deletions R/get_athena_query_response.R

This file was deleted.

41 changes: 0 additions & 41 deletions R/get_data_conversion.R

This file was deleted.

50 changes: 50 additions & 0 deletions R/read.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#' Send an SQL query to Athena and receive a dataframe.
#'
#' @param sql An SQL query
#'
#' @return Dataframe or tibble if the tibble library is loaded.
#' @export
#'
#' @examples
#' `df <- dbtools::read_sql_query('select * from my_db.my_table')`
read_sql_query <- function(sql) {
# Download the dataframe result to a parquet temporary file as pandas and
# reticulate are frequently incompatible, and load the data into R using
# arrow.
tmp_location <- tempfile(fileext=".parquet")
dbtools.env$pydb$save_query_to_parquet(sql, tmp_location)
df <- arrow::read_parquet(tmp_location)
unlink(tmp_location)
return(df)
}

#' Uses boto3 (in python) to send an sql query to athena and return an R dataframe, tibble or data.table based on user preference.
#'
#' @export
#'
#' @details Will send an SQL query to Athena and wait for it to complete. Once the query has completed the resulting sql query will be read using arrow.
#' Function returns dataframe. If needing more a more bespoke or self defined data reading function and arguments use dbtools::start_query_and_wait to send an SQL query and return the s3 path to data in csv format.
#'
#' @param sql_query A string specifying the SQL query you want to send to athena. See packages github readme for info on the flavour of SQL Athena uses.
#' @param return_df_as String specifying what the table should be returned as i.e. 'dataframe', 'tibble' (converts data using tibble::as_tibble) or 'data.table' (converts data using data.table::as.data.table). Default is 'tibble'. Not all tables returned are a DataFrame class.
#'
#' @return A table as a dataframe, tibble or data.table
#'
#' @examples
#' # Read an sql query returning a tibble
#' ```
#' df <- dbtools::read_sql(
#' "SELECT * from crest_v1.flatfile limit 10000",
#' return_df_as="tibble"
#' )
#' ```
read_sql <- function(sql_query, return_df_as="tibble") {
df <- read_sql_query(sql_query)
if (return_df_as == "dataframe") {
return(as.data.frame(df))
} else if (return_df_as == "data.table") {
return(data.table::as.data.table(df))
} else {
return(tibble::as_tibble(df))
}
}
78 changes: 0 additions & 78 deletions R/read_sql.R

This file was deleted.

Loading

0 comments on commit 8b3809f

Please sign in to comment.