add num_partitions argument to support large datasets

Nixtla · Aug 27, 2024 · af251c3 · af251c3
1 parent 60622ef
commit af251c3
Show file tree

Hide file tree

Showing 9 changed files with 19 additions and 25 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -20,6 +20,8 @@ Depends:
 LazyData: true
 Imports: 
     dplyr,
+    future,
+    future.apply,
     ggplot2,
     httr2,
     lubridate,

diff --git a/NAMESPACE b/NAMESPACE
@@ -28,6 +28,10 @@ importFrom(dplyr,slice)
 importFrom(dplyr,slice_tail)
 importFrom(dplyr,summarize)
 importFrom(dplyr,ungroup)
+importFrom(future,availableCores)
+importFrom(future,multisession)
+importFrom(future,plan)
+importFrom(future.apply,future_lapply)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,facet_wrap)
 importFrom(ggplot2,geom_line)

diff --git a/R/nixtlaR-package.R b/R/nixtlaR-package.R
@@ -13,6 +13,10 @@
 #' @importFrom dplyr slice_tail
 #' @importFrom dplyr summarize
 #' @importFrom dplyr ungroup
+#' @importFrom future availableCores
+#' @importFrom future multisession
+#' @importFrom future plan
+#' @importFrom future.apply future_lapply
 #' @importFrom ggplot2 aes
 #' @importFrom ggplot2 facet_wrap
 #' @importFrom ggplot2 geom_line

diff --git a/R/nixtla_client_cross_validation.R b/R/nixtla_client_cross_validation.R
@@ -29,8 +29,6 @@
 #'
 nixtla_client_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", target_col="y", X_df=NULL, level=NULL, quantiles=NULL, n_windows=1, step_size=NULL, finetune_steps=0, finetune_loss="default", clean_ex_first=TRUE, model="timegpt-1", num_partitions=NULL){
 
-  start <- Sys.time()
-
   # Prepare data ----
   names(df)[which(names(df) == time_col)] <- "ds"
   names(df)[which(names(df) == target_col)] <- "y"
@@ -158,7 +156,7 @@ nixtla_client_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time
   }
 
   # Date transformation ----
-  res <- .transform_output_dates(res, "ds", freq, data$flag)
+  res <- .transform_output_dates(res, id_col, "ds", freq, data$flag)
   new_cutoff <- future.apply::future_lapply(res$cutoff, lubridate::ymd_hms)
   res$cutoff <- do.call(c, new_cutoff)
 
@@ -174,8 +172,5 @@ nixtla_client_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time
 
   row.names(res) <- NULL
 
-  end <- Sys.time()
-  print(paste0("Total execution time: ", end-start))
-
   return(res)
 }
diff --git a/R/nixtla_client_detect_anomalies.R b/R/nixtla_client_detect_anomalies.R
@@ -22,8 +22,6 @@
 #'
 nixtla_client_detect_anomalies <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_col="y", level=c(99), clean_ex_first=TRUE, model="timegpt-1", num_partitions=NULL){
 
-  start <- Sys.time()
-
   # Prepare data ----
   names(df)[which(names(df) == time_col)] <- "ds"
   names(df)[which(names(df) == target_col)] <- "y"
@@ -89,7 +87,7 @@ nixtla_client_detect_anomalies <- function(df, freq=NULL, id_col=NULL, time_col=
   res[, 3:ncol(res)] <- future.apply::future_lapply(res[, 3:ncol(res)], as.numeric)
 
   # Date transformation ----
-  res <- .transform_output_dates(res, "ds", freq, data$flag)
+  res <- .transform_output_dates(res, id_col, "ds", freq, data$flag)
 
   # Rename columns ----
   colnames(res)[which(colnames(res) == "ds")] <- time_col
@@ -103,8 +101,5 @@ nixtla_client_detect_anomalies <- function(df, freq=NULL, id_col=NULL, time_col=
 
   row.names(res) <- NULL
 
-  end <- Sys.time()
-  print(paste0("Total execution time: ", end-start))
-
   return(res)
 }
diff --git a/R/nixtla_client_forecast.R b/R/nixtla_client_forecast.R
@@ -28,7 +28,6 @@
 #'
 nixtla_client_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", target_col="y", X_df=NULL, level=NULL, quantiles=NULL, finetune_steps=0, finetune_loss="default", clean_ex_first=TRUE, add_history=FALSE, model="timegpt-1", num_partitions=NULL){
 
-  start <- Sys.time()
   # Prepare data ----
   names(df)[which(names(df) == time_col)] <- "ds"
   names(df)[which(names(df) == target_col)] <- "y"
@@ -153,7 +152,7 @@ nixtla_client_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds
   }
 
   # Date transformation ----
-  fcst <- .transform_output_dates(fcst, "ds", freq, data$flag)
+  fcst <- .transform_output_dates(fcst, id_col, "ds", freq, data$flag)
 
   # Rename columns ----
   names(fcst)[which(names(fcst) == "ds")] <- time_col
@@ -180,8 +179,5 @@ nixtla_client_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds
 
   row.names(fcst) <- NULL
 
-  end <- Sys.time()
-  print(paste0("Total execution time: ", end-start))
-
   return(fcst)
 }
diff --git a/R/nixtla_client_historic.R b/R/nixtla_client_historic.R
@@ -25,8 +25,6 @@
 #'
 nixtla_client_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_col="y", level=NULL, quantiles=NULL, finetune_steps=0, finetune_loss="default", clean_ex_first=TRUE, model="timegpt-1", num_partitions=NULL){
 
-  start <- Sys.time()
-
   # Prepare data ----
   names(df)[which(names(df) == time_col)] <- "ds"
   names(df)[which(names(df) == target_col)] <- "y"
@@ -131,7 +129,7 @@ nixtla_client_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", ta
   }
 
   # Date transformation ----
-  fitted <- .transform_output_dates(fitted, "ds", freq, data$flag)
+  fitted <- .transform_output_dates(fitted, id_col, "ds", freq, data$flag)
 
   # Rename columns ----
   names(fitted)[which(names(fitted) == "ds")] <- time_col
@@ -145,8 +143,5 @@ nixtla_client_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", ta
 
   row.names(fitted) <- NULL
 
-  end <- Sys.time()
-  print(paste0("Total execution time: ", end-start))
-
   return(fitted)
 }
diff --git a/R/transform_output_dates.R b/R/transform_output_dates.R
@@ -2,6 +2,7 @@
 ##' This is a private function of 'nixtlar'
 #'
 #' @param df Dataframe with the 'TimeGPT' output, where column 'col' contains date strings.
+#' @param id_col Column that identifies each series.
 #' @param col Name of the column with the dates to transform.
 #' @param freq Frequency of the data.
 #' @param flag Indicator where 1 denotes 'tsibble' and 0 denotes 'dataframe'.
@@ -15,7 +16,7 @@
 #'   fcst <- .transform_output_dates(fcst, col, freq, flag)
 #' }
 #'
-.transform_output_dates <- function(df, col, freq, flag){
+.transform_output_dates <- function(df, id_col, col, freq, flag){
 
   index_col <- which(names(df) == col)
 

diff --git a/man/dot-transform_output_dates.Rd b/man/dot-transform_output_dates.Rd