diff --git a/DESCRIPTION b/DESCRIPTION index 3e334bc..62d3b0d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: GWalkR Title: Interactive Exploratory Data Analysis Tool -Version: 0.1.5 +Version: 0.2.0 Authors@R: c( person("Yue", "Yu", , "yue.yu@connect.ust.hk", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-9302-0793")), @@ -17,4 +17,7 @@ Imports: htmlwidgets, jsonlite, openssl, - shiny + shiny, + shinycssloaders, + DBI, + duckdb diff --git a/NAMESPACE b/NAMESPACE index fbeb364..6553203 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,10 @@ export(gwalkr) export(gwalkrOutput) export(renderGwalkr) +import(DBI) +import(duckdb) import(htmlwidgets) import(openssl) import(shiny) +import(shinycssloaders) +importFrom(jsonlite,toJSON) diff --git a/R/data_parser.R b/R/data_parser.R index cc18703..b9f9c7f 100644 --- a/R/data_parser.R +++ b/R/data_parser.R @@ -3,16 +3,21 @@ library(openssl) raw_fields <- function(df, columnSpecs = list()) { validate_columnSpecs(columnSpecs) cols <- colnames(df) + if (nrow(df) > 1000) { + df_sample <- df[sample(nrow(df),1000),] + } else { + df_sample <- df + } props <- lapply(seq_along(cols), function(i) { - infer_prop(cols[i], i, df, columnSpecs) + infer_prop(cols[i], i, df_sample, columnSpecs) }) return(props) } infer_prop <- function(col, i = NULL, df, columnSpecs = list()) { s <- df[[col]] - semantic_type <- ifelse((col %in% names(columnSpecs)), columnSpecs[[col]]$semanticType, infer_semantic(s)) - analytic_type <- ifelse((col %in% names(columnSpecs)), columnSpecs[[col]]$analyticalType, infer_analytic(s)) + semantic_type <- ifelse((col %in% names(columnSpecs)), columnSpecs[[col]]$semanticType, infer_semantic(s, col)) + analytic_type <- ifelse((col %in% names(columnSpecs)), columnSpecs[[col]]$analyticalType, infer_analytic(s, col)) prop <- list( fid = fname_encode(col), name = col, @@ -22,28 +27,33 @@ infer_prop <- function(col, i = NULL, df, columnSpecs = list()) { return(prop) } -infer_semantic <- function(s) { +is_geo_field <- function(field_name) { + field_name <- tolower(trimws(field_name, which = "both", whitespace = " .")) + return(field_name %in% c("latitude", "longitude", "lat", "long", "lon")) +} + +infer_semantic <- function(s, field_name) { v_cnt <- length(unique(s)) kind <- class(s) - if (any(sapply(c('numeric', 'integer'), inherits, x = s)) & v_cnt > 16) { + if (all(kind %in% c("numeric", "integer", "double", "complex")) || is_geo_field(field_name)) { return('quantitative') - } else if (any(sapply(c('POSIXct', 'POSIXlt', 'Date'), inherits, x = s))) { + } else if (any(sapply(c('POSIXct', 'POSIXlt', 'POSIXt', 'Date'), inherits, x = s))) { return('temporal') - } else if (inherits(s, 'ordered')) { - return('ordinal') } else { return('nominal') } } -infer_analytic <- function(s) { +infer_analytic <- function(s, field_name) { v_cnt <- length(unique(s)) kind <- class(s) - if ((inherits(s, 'numeric')) | (inherits(s, 'integer') & v_cnt > 16)) { - return('measure') - } else { - return('dimension') + if (is_geo_field(field_name)) { + return("dimension") + } + if (all(kind %in% c("numeric", "integer", "double", "complex"))) { + return("measure") } + return("dimension") } validate_columnSpecs <- function(columnSpecs) { diff --git a/R/duckdb_utils.R b/R/duckdb_utils.R new file mode 100644 index 0000000..6ba8ffd --- /dev/null +++ b/R/duckdb_utils.R @@ -0,0 +1,59 @@ +library(DBI) +library(duckdb) + +my_env <- new.env() + +duckdb_register_con <- function(df) { + my_env$con <- dbConnect(duckdb::duckdb(), ":memory:") + dbExecute(my_env$con, "INSTALL icu") + dbExecute(my_env$con, "LOAD icu") + dbExecute(my_env$con, "SET GLOBAL TimeZone = 'UTC'") + DBI::dbWriteTable(my_env$con, "gwalkr_mid_table", as.data.frame(df), overwrite = FALSE) +} + +duckdb_unregister_con <- function(df) { + if (!is.null(my_env$con)) { + dbDisconnect(my_env$con) + my_env$con <- NULL # Set to NULL after disconnecting + } +} + +duckdb_get_field_meta <- function() { + if (exists("con", envir = my_env)) { + result <- dbGetQuery(my_env$con, 'SELECT * FROM gwalkr_mid_table LIMIT 1') + if (nrow(result) > 0) { + return(get_data_meta_type(result)) + } + } else { + stop("Database connection not found.") + } +} + +duckdb_get_data <- function(sql) { + if (exists("con", envir = my_env)) { + result <- dbGetQuery(my_env$con, sql) + if (nrow(result) > 0) { + return(result) + } + } else { + stop("Database connection not found.") + } +} + +get_data_meta_type <- function(data) { + meta_types <- list() + + for (key in names(data)) { + value <- data[[key]] + field_meta_type <- if (inherits(value, "POSIXct")) { + if (!is.null(attr(value, "tzone"))) "datetime_tz" else "datetime" + } else if (is.numeric(value)) { + "number" + } else { + "string" + } + meta_types <- append(meta_types, list(list(key = key, type = field_meta_type))) + } + + return(meta_types) +} diff --git a/R/gwalkr.R b/R/gwalkr.R index 77be9dc..4fc4351 100644 --- a/R/gwalkr.R +++ b/R/gwalkr.R @@ -4,14 +4,19 @@ #' #' @import htmlwidgets #' @import openssl +#' @importFrom jsonlite toJSON +#' @import shiny +#' @import shinycssloaders +#' @import DBI +#' @import duckdb #' #' @param data A data frame to be visualized in the GWalkR. The data frame should not be empty. #' @param lang A character string specifying the language for the widget. Possible values are "en" (default), "ja", "zh". #' @param dark A character string specifying the dark mode preference. Possible values are "light" (default), "dark", "media". -#' @param columnSpecs An optional list of lists to manually specify the types of some columns in the data frame. -#' Each top level element in the list corresponds to a column, and the list assigned to each column should have -#' two elements: `analyticalType` and `semanticType`. `analyticalType` can -#' only be one of "measure" or "dimension". `semanticType` can only be one of +#' @param columnSpecs An optional list of lists to manually specify the types of some columns in the data frame. +#' Each top level element in the list corresponds to a column, and the list assigned to each column should have +#' two elements: `analyticalType` and `semanticType`. `analyticalType` can +#' only be one of "measure" or "dimension". `semanticType` can only be one of #' "quantitative", "temporal", "nominal" or "ordinal". For example: #' \code{list( #' "gender" = list(analyticalType = "dimension", semanticType = "nominal"), @@ -20,6 +25,7 @@ #' @param visConfig An optional config string to reproduce your chart. You can copy the string by clicking "export config" button on the GWalkR interface. #' @param visConfigFile An optional config file path to reproduce your chart. You can download the file by clicking "export config" button then "download" button on the GWalkR interface. #' @param toolbarExclude An optional list of strings to exclude the tools from toolbar UI. However, Kanaries brand info is not allowed to be removed or changed unless you are granted with special permission. +#' @param kernelComputation An optional boolean to enable the kernel mode computation which is much more efficient. Default is FALSE. #' #' @return An \code{htmlwidget} object that can be rendered in R environments #' @@ -28,42 +34,47 @@ #' gwalkr(mtcars) #' #' @export -gwalkr <- function(data, lang = "en", dark = "light", columnSpecs = list(), visConfig = NULL, visConfigFile = NULL, toolbarExclude = list()) { +gwalkr <- function(data, lang = "en", dark = "light", columnSpecs = list(), visConfig = NULL, visConfigFile = NULL, toolbarExclude = list(), kernelComputation = FALSE) { if (!is.data.frame(data)) stop("data must be a data frame") if (!is.null(visConfig) && !is.null(visConfigFile)) stop("visConfig and visConfigFile are mutually exclusive") lang <- match.arg(lang, choices = c("en", "ja", "zh")) rawFields <- raw_fields(data, columnSpecs) colnames(data) <- sapply(colnames(data), fname_encode) - + if (!is.null(visConfigFile)) { visConfig <- readLines(visConfigFile, warn=FALSE) } - # forward options using x - x = list( - dataSource = jsonlite::toJSON(data), - rawFields = rawFields, - i18nLang = lang, - visSpec = visConfig, - dark = dark, - toolbarExclude = toolbarExclude - ) - # create widget - htmlwidgets::createWidget( - name = 'gwalkr', - x, - package = 'GWalkR', - width='100%', - height='100%' - ) + if (kernelComputation) { + gwalkr_kernel(data, lang, dark, rawFields, visConfig, toolbarExclude) + } else { + x = list( + dataSource = toJSON(data), + rawFields = rawFields, + i18nLang = lang, + visSpec = visConfig, + dark = dark, + toolbarExclude = toolbarExclude, + useKernel = FALSE + ) + + # create widget + htmlwidgets::createWidget( + name = 'gwalkr', + x, + package = 'GWalkR', + width='100%', + height='100%' + ) + } } #' Shiny bindings for gwalkr #' #' Output and render functions for using gwalkr within Shiny #' applications and interactive Rmd documents. -#' +#' #' @import shiny #' #' @param outputId output variable to read from diff --git a/R/gwalkr_kernel.R b/R/gwalkr_kernel.R new file mode 100644 index 0000000..ea8264b --- /dev/null +++ b/R/gwalkr_kernel.R @@ -0,0 +1,80 @@ +convert_timestamps_in_df <- function(df) { + for (colname in colnames(df)) { + if (inherits(df[[colname]], "POSIXt")) { + df[[colname]] <- as.numeric(as.POSIXct(df[[colname]], tz = "UTC")) * 1000 + } + } + return(df) +} + +gwalkr_kernel <- function(data, lang, dark, rawFields, visConfig, toolbarExclude) { + cat("GWalkR kernel mode initialized...\n") + cat("Note: The console is unavailable while running a Shiny app. You can stop the app to use the console, or press Ctrl + C to terminate.\n") + + filter_func <- function(data, req) { + query <- parseQueryString(req$QUERY_STRING) + + res <- duckdb_get_data(query$sql) + res <- convert_timestamps_in_df(res) + + json <- toJSON( + res, + auto_unbox = TRUE + ) + + httpResponse( + status = 200L, + content_type = "application/json", + content = json + ) + } + + app <- shinyApp( + ui = fluidPage( + shinycssloaders::withSpinner( + gwalkrOutput("gwalkr_kernel"), + proxy.height="400px" + ) + ), + + server = function(input, output, session) { + path <- session$registerDataObj( + "GWALKR", + NULL, + filter_func + ) + + duckdb_register_con(data) + fieldMetas <- duckdb_get_field_meta() + + x = list( + rawFields = rawFields, + i18nLang = lang, + visSpec = visConfig, + dark = dark, + toolbarExclude = toolbarExclude, + useKernel = TRUE, + fieldMetas = fieldMetas, + endpointPath = path + ) + + output$gwalkr_kernel = renderGwalkr({ + htmlwidgets::createWidget( + name = 'gwalkr', + x, + package = 'GWalkR', + width='100%', + height='100%' + ) + }) + session$onSessionEnded(function() { + cat("GwalkR closed") + duckdb_unregister_con() + }) + }, + + options=c(launch.browser = .rs.invokeShinyPaneViewer) + ) + + if (interactive()) app +} \ No newline at end of file diff --git a/man/gwalkr.Rd b/man/gwalkr.Rd index dc0df85..0e39ca4 100644 --- a/man/gwalkr.Rd +++ b/man/gwalkr.Rd @@ -11,7 +11,8 @@ gwalkr( columnSpecs = list(), visConfig = NULL, visConfigFile = NULL, - toolbarExclude = list() + toolbarExclude = list(), + kernelComputation = FALSE ) } \arguments{ @@ -36,6 +37,8 @@ only be one of "measure" or "dimension". \code{semanticType} can only be one of \item{visConfigFile}{An optional config file path to reproduce your chart. You can download the file by clicking "export config" button then "download" button on the GWalkR interface.} \item{toolbarExclude}{An optional list of strings to exclude the tools from toolbar UI. However, Kanaries brand info is not allowed to be removed or changed unless you are granted with special permission.} + +\item{kernelComputation}{An optional boolean to enable the kernel mode computation which is much more efficient. Default is FALSE.} } \value{ An \code{htmlwidget} object that can be rendered in R environments diff --git a/web_app/package.json b/web_app/package.json index 1eb032d..a10927d 100644 --- a/web_app/package.json +++ b/web_app/package.json @@ -11,6 +11,7 @@ }, "dependencies": { "@kanaries/graphic-walker": "^0.4.70", + "@kanaries/gw-dsl-parser": "^0.1.49", "@rollup/plugin-commonjs": "^25.0.2", "@rollup/plugin-replace": "^5.0.2", "@rollup/plugin-terser": "^0.4.3", @@ -18,7 +19,8 @@ "mobx-react-lite": "^3.4.3", "react": "^18.2.0", "react-dom": "^18.2.0", - "styled-components": "^5.3.6" + "styled-components": "^5.3.6", + "vite-plugin-wasm": "^3.3.0" }, "devDependencies": { "@types/react": "^18.2.14", diff --git a/web_app/src/dataSource/index.tsx b/web_app/src/dataSource/index.tsx new file mode 100644 index 0000000..ce4dab1 --- /dev/null +++ b/web_app/src/dataSource/index.tsx @@ -0,0 +1,31 @@ +import type { IDataQueryPayload, IRow } from "@kanaries/graphic-walker/interfaces"; +import { parser_dsl_with_meta } from "@kanaries/gw-dsl-parser"; + +const DEFAULT_LIMIT = 50_000; + +const sendHTTPData = (sql: string, endpointPath: string) => { + return new Promise((resolve, reject) => { + fetch(`${endpointPath}&sql=${encodeURIComponent(sql)}`) + .then((response) => response.json()) + .then((data) => { + console.log("Processed data from R:", data); + resolve(data); + }) + .catch((error) => { + console.error("Error:", error); + reject(error); + }); + }); +}; + +export function getDataFromKernelBySql(fieldMetas: { key: string; type: string }[], endpointPath: string) { + return async (payload: IDataQueryPayload) => { + const sql = parser_dsl_with_meta( + "gwalkr_mid_table", + JSON.stringify({ ...payload, limit: payload.limit ?? DEFAULT_LIMIT }), + JSON.stringify({ gwalkr_mid_table: fieldMetas }) + ); + const result = (await sendHTTPData(sql, endpointPath)) ?? []; + return result as IRow[]; + }; +} diff --git a/web_app/src/index.tsx b/web_app/src/index.tsx index dc55654..6a93b1d 100644 --- a/web_app/src/index.tsx +++ b/web_app/src/index.tsx @@ -9,9 +9,13 @@ import CodeExportModal from "./components/codeExportModal"; import { StyleSheetManager } from "styled-components"; import tailwindStyle from "tailwindcss/tailwind.css?inline"; import formatSpec from "./utils/formatSpec"; +import { getDataFromKernelBySql } from "./dataSource"; + +import initDslParser from "@kanaries/gw-dsl-parser"; +import wasmPath from "@kanaries/gw-dsl-parser/gw_dsl_parser_bg.wasm?url"; const App: React.FC = observer((propsIn) => { - const { dataSource, visSpec, rawFields, toolbarExclude, ...props } = propsIn; + const { dataSource, visSpec, rawFields, toolbarExclude, useKernel, ...props } = propsIn; const storeRef = React.useRef(null); const specList = visSpec ? formatSpec(JSON.parse(visSpec) as any[], rawFields) : undefined; @@ -25,14 +29,35 @@ const App: React.FC = observer((propsIn) => { exclude: toolbarExclude ? [...toolbarExclude, "export_code"] : ["export_code"], extra: tools, }; - return ( - -
- - -
-
- ); + + if (useKernel) { + const { endpointPath, fieldMetas } = propsIn; + return ( + +
+ + +
+
+ ); + } else { + return ( + +
+ + +
+
+ ); + } }); const GWalkR = (props: IAppProps, id: string) => { @@ -46,11 +71,26 @@ const GWalkR = (props: IAppProps, id: string) => { shadowRoot.appendChild(styleElement); const root = createRoot(shadowRoot); - root.render( - - - - ); + + if (props.useKernel) { + initDslParser(wasmPath) + .then(() => { + root.render( + + + + ); + }) + .catch((e) => { + console.error(e); + }); + } else { + root.render( + + + + ); + } } // If you want to execute GWalkR after the document has loaded, you can do it here. // But remember, you will need to provide the 'props' and 'id' parameters. diff --git a/web_app/src/interfaces/index.ts b/web_app/src/interfaces/index.ts index baabc7f..98d7454 100644 --- a/web_app/src/interfaces/index.ts +++ b/web_app/src/interfaces/index.ts @@ -1,14 +1,23 @@ -import type { IRow, IMutField } from '@kanaries/graphic-walker/interfaces' +import type { IRow, IMutField } from "@kanaries/graphic-walker/interfaces"; -export interface IAppProps { +export interface IAppPropsBase { id: string; version?: string; - hashcode?: string; visSpec?: string; - env?: string; - needLoadDatas?: boolean; - specType?: string; dataSource: IRow[]; rawFields: IMutField[]; toolbarExclude: string[]; + useKernel: boolean; } + +export interface IAppPropsWithKernel extends IAppPropsBase { + useKernel: true; + fieldMetas: { key: string; type: string }[]; + endpointPath: string; +} + +export interface IAppPropsWithoutKernel extends IAppPropsBase { + useKernel: false; +} + +export type IAppProps = IAppPropsWithKernel | IAppPropsWithoutKernel; diff --git a/web_app/vite.config.ts b/web_app/vite.config.ts index 7267534..d7d5020 100644 --- a/web_app/vite.config.ts +++ b/web_app/vite.config.ts @@ -1,4 +1,5 @@ import { defineConfig, ConfigEnv, UserConfig } from 'vite' +import wasm from 'vite-plugin-wasm'; import path from 'path'; import react from '@vitejs/plugin-react' import typescript from '@rollup/plugin-typescript' @@ -19,6 +20,7 @@ export default defineConfig((config: ConfigEnv) => { }, plugins: [ react(), + wasm(), // @ts-ignore { ...typescript({ diff --git a/web_app/yarn.lock b/web_app/yarn.lock index 11137a7..aa60f52 100644 --- a/web_app/yarn.lock +++ b/web_app/yarn.lock @@ -910,6 +910,11 @@ vega-lite "^5.6.0" vega-webgl-renderer "^1.0.0-beta.2" +"@kanaries/gw-dsl-parser@^0.1.49": + version "0.1.49" + resolved "https://registry.yarnpkg.com/@kanaries/gw-dsl-parser/-/gw-dsl-parser-0.1.49.tgz#9f5c6c731ca47e52e41c319b31a1f8348d6525f6" + integrity sha512-gK95BVQhO0I7wN7VsntRzwpdTXqeXj6ESdvVTau+oXEYp4SQP2EVkQDLp+bbIKO2VVQ6iRQrqmzGTPC8wL2Xzg== + "@kanaries/react-beautiful-dnd@^0.1.1": version "0.1.1" resolved "https://registry.npmjs.org/@kanaries/react-beautiful-dnd/-/react-beautiful-dnd-0.1.1.tgz" @@ -5643,6 +5648,11 @@ vega@^5.22.1: vega-voronoi "~4.2.1" vega-wordcloud "~4.1.4" +vite-plugin-wasm@^3.3.0: + version "3.3.0" + resolved "https://registry.yarnpkg.com/vite-plugin-wasm/-/vite-plugin-wasm-3.3.0.tgz#2908ef2529bf8f33f4e549c8c6fda26ad273ca15" + integrity sha512-tVhz6w+W9MVsOCHzxo6SSMSswCeIw4HTrXEi6qL3IRzATl83jl09JVO1djBqPSwfjgnpVHNLYcaMbaDX5WB/pg== + vite@^4.4.0: version "4.4.2" resolved "https://registry.npmmirror.com/vite/-/vite-4.4.2.tgz"