Skip to content

Commit

Permalink
Added support for exogenous variables and minor improvements to core …
Browse files Browse the repository at this point in the history
…functions.
  • Loading branch information
MMenchero committed Dec 13, 2023
1 parent 85d8c84 commit 0a09e9c
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 45 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(.validate_exogenous)
export(date_conversion)
export(infer_frequency)
export(nixtla_set_token)
Expand Down
21 changes: 13 additions & 8 deletions R/timegpt_anomaly_detection.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@
timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_col="y", level=c(99), clean_ex_first=TRUE, model="timegpt-1"){

# Prepare data ----
url_anomaly <- "https://dashboard.nixtla.io/api/timegpt_multi_series_anomalies"
names(df)[which(names(df) == time_col)] <- "ds"
names(df)[which(names(df) == target_col)] <- "y"

if(is.null(id_col)){
# create unique_id for single series
df <- df |>
dplyr::mutate(unique_id = "id") |>
dplyr::mutate(unique_id = "ts_0") |>
dplyr::select(c("unique_id", tidyselect::everything()))
}else{
# id_col is not NULL
names(df)[which(names(df) == id_col)] <- "unique_id"
}

data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col)
Expand All @@ -34,11 +39,10 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds",
clean_ex_first = clean_ex_first
)

names(df)[which(names(df) == time_col)] <- "ds"
names(df)[which(names(df) == target_col)] <- "y"
if(any(!(names(df) %in% c("unique_id", "ds", "y")))){
exogenous <- df |>
dplyr::select(-y)
if(!any(names(df) %in% c("unique_id", "ds", "y"))){
# input includes exogenous variables
exogenous <- df |>
dplyr::select(-c(.data$y))

x <- list(
columns = names(exogenous),
Expand All @@ -55,6 +59,7 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds",
timegpt_data[["level"]] <- level

# Make request ----
url_anomaly <- "https://dashboard.nixtla.io/api/timegpt_multi_series_anomalies"
resp_anomaly <- httr2::request(url_anomaly) |>
httr2::req_headers(
"accept" = "application/json",
Expand Down Expand Up @@ -104,7 +109,7 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds",
}else{
# remove unique_id column
res <- res |>
dplyr::select(-unique_id)
dplyr::select(-c(.data$unique_id))
}

return(res)
Expand Down
25 changes: 20 additions & 5 deletions R/timegpt_cross_validation.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,16 @@
timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", target_col="y", X_df=NULL, level=NULL, n_windows=1, step_size=NULL, finetune_steps=0, clean_ex_first=TRUE, model="timegpt-1"){

# Prepare data ----
url_cv <- "https://dashboard.nixtla.io/api/timegpt_multi_series_cross_validation"
names(df)[which(names(df) == time_col)] <- "ds"
names(df)[which(names(df) == target_col)] <- "y"

if(is.null(id_col)){
df <- df |>
dplyr::mutate(unique_id = "id") |>
dplyr::mutate(unique_id = "ts_0") |>
dplyr::select(c("unique_id", tidyselect::everything()))
}else{
# id_col is not NULL
names(df)[which(names(df) == id_col)] <- "unique_id"
}

data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col)
Expand All @@ -48,11 +53,20 @@ timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col="

if(!is.null(X_df)){
names(X_df)[which(names(X_df) == time_col)] <- "ds"
names(X_df)[which(names(X_df) == target_col)] <- "y"
if(!is.null(id_col)){
if(is.null(id_col)){
X_df <- X_df |>
dplyr::mutate(unique_id = "ts_0") |>
dplyr::select(c("unique_id", tidyselect::everything()))
}else{
names(X_df)[which(names(X_df) == id_col)] <- "unique_id"
}

# Validation checks for exogenous variables
status <- .validate_exogenous(df, h, X_df)
if(!status$validation){
stop(print(status$message))
}

exogenous <- df |>
dplyr::select(-y)

Expand All @@ -72,6 +86,7 @@ timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col="
}

# Make request ----
url_cv <- "https://dashboard.nixtla.io/api/timegpt_multi_series_cross_validation"
resp_cv <- httr2::request(url_cv) |>
httr2::req_headers(
"accept" = "application/json",
Expand Down Expand Up @@ -133,7 +148,7 @@ timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col="
}else{
# remove unique_id column
res <- res |>
dplyr::select(-unique_id)
dplyr::select(-c(.data$unique_id))
}

return(res)
Expand Down
22 changes: 8 additions & 14 deletions R/timegpt_data_prep.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#' Prepares data for TimeGPT's API
#' This is a private function of the package
#' This is a private function of nixtlar
#'
#' @param df A tsibble or a data frame with time series data.
#' @param freq Frequency of the data.
#' @param id_col Column that identifies each series.
#' @param time_col Column that identifies each timestep.
#' @param target_col Column that contains the target variable.
#' @param id_col Column that identifies each series. Should be named unique_id.
#' @param time_col Column that identifies each timestep. Should be named ds.
#' @param target_col Column that contains the target variable. Should be named y.
#'
#' @return A list with the given or inferred frequency, the prepared data, and the original data frame renamed.
#'
Expand All @@ -15,13 +15,6 @@
stop("Only tsibbles or data frames are allowed.")
}

# Rename columns
names(df)[which(names(df) == time_col)] <- "ds"
names(df)[which(names(df) == target_col)] <- "y"
if(!is.null(id_col)){
names(df)[which(names(df) == id_col)] <- "unique_id"
}

# If df is a tsibble, convert dates to strings and infer frequency if necessary
if(tsibble::is_tsibble(df)){
res <- date_conversion(df)
Expand All @@ -35,10 +28,11 @@
}

# Prepare data
df <- df[,c("unique_id", "ds", "y")]
filtered_df <- df[,c("unique_id", "ds", "y")]

y <- list(
columns = names(df),
data = lapply(1:nrow(df), function(i) as.list(df[i,]))
columns = names(filtered_df),
data = lapply(1:nrow(filtered_df), function(i) as.list(filtered_df[i,]))
)

res <- list(freq = freq,
Expand Down
23 changes: 19 additions & 4 deletions R/timegpt_forecast.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@
timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", target_col="y", X_df=NULL, level=NULL, finetune_steps=0, clean_ex_first=TRUE, add_history=FALSE, model="timegpt-1"){

# Prepare data ----
url <- "https://dashboard.nixtla.io/api/timegpt_multi_series"
names(df)[which(names(df) == time_col)] <- "ds"
names(df)[which(names(df) == target_col)] <- "y"

if(is.null(id_col)){
# create unique_id for single series
df <- df |>
dplyr::mutate(unique_id = "id") |>
dplyr::mutate(unique_id = "ts_0") |>
dplyr::select(c("unique_id", tidyselect::everything()))
}else{
# id_col is not NULL
names(df)[which(names(df) == id_col)] <- "unique_id"
}

data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col)
Expand All @@ -43,10 +47,20 @@ timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", tar

if(!is.null(X_df)){
names(X_df)[which(names(X_df) == time_col)] <- "ds"
if(!is.null(id_col)){
if(is.null(id_col)){
X_df <- X_df |>
dplyr::mutate(unique_id = "ts_0") |>
dplyr::select(c("unique_id", tidyselect::everything()))
}else{
names(X_df)[which(names(X_df) == id_col)] <- "unique_id"
}

# Validation checks for exogenous variables
status <- .validate_exogenous(df, h, X_df)
if(!status$validation){
stop(print(status$message))
}

exogenous <- df |>
dplyr::select(-y)

Expand All @@ -66,6 +80,7 @@ timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", tar
}

# Make request ----
url <- "https://dashboard.nixtla.io/api/timegpt_multi_series"
resp <- httr2::request(url) |>
httr2::req_headers(
"accept" = "application/json",
Expand Down Expand Up @@ -118,7 +133,7 @@ timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", tar
}else{
# remove unique_id column
fcst <- fcst |>
dplyr::select(-unique_id)
dplyr::select(-c(.data$unique_id))
}

# Generate fitted values ----
Expand Down
21 changes: 12 additions & 9 deletions R/timegpt_historic.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@
timegpt_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_col="y", level=NULL, finetune_steps=0, clean_ex_first=TRUE){

# Prepare data ----
url_historic <- "https://dashboard.nixtla.io/api/timegpt_multi_series_historic"
names(df)[which(names(df) == time_col)] <- "ds"
names(df)[which(names(df) == target_col)] <- "y"

if(is.null(id_col)){
# create unique_id for single series
df <- df |>
dplyr::mutate(unique_id = "id") |>
dplyr::mutate(unique_id = "ts_0") |>
dplyr::select(c("unique_id", tidyselect::everything()))
}else{
# id_col is not NULL
names(df)[which(names(df) == id_col)] <- "unique_id"
}

data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col)
Expand All @@ -35,12 +39,10 @@ timegpt_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_c
clean_ex_first = clean_ex_first
)

names(df)[which(names(df) == time_col)] <- "ds"
names(df)[which(names(df) == target_col)] <- "y"

if(any(!(names(df) %in% c("unique_id", "ds", "y")))){
exogenous <- df |>
dplyr::select(-y)
if(!any(names(df) %in% c("unique_id", "ds", "y"))){
# input includes exogenous variables
exogenous <- df |>
dplyr::select(-c(.data$y))

x <- list(
columns = names(exogenous),
Expand All @@ -56,6 +58,7 @@ timegpt_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_c
}

# Make request ----
url_historic <- "https://dashboard.nixtla.io/api/timegpt_multi_series_historic"
resp_hist <- httr2::request(url_historic) |>
httr2::req_headers(
"accept" = "application/json",
Expand Down Expand Up @@ -105,7 +108,7 @@ timegpt_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_c
}else{
# remove unique_id column
fitted <- fitted |>
dplyr::select(-unique_id)
dplyr::select(-c(.data$unique_id))
}

return(fitted)
Expand Down
37 changes: 37 additions & 0 deletions R/validate_exogenous.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#' Validate exogenous variables (if applicable)
#' This is a private function of nixtlar
#'
#' @param df A tsibble or a data frame with time series data.
#' @param h Forecast horizon.
#' @param X_df A tsibble or a data frame with future exogenous variables.
#'
#' @return A list with the result of the validation (TRUE/FALSE) and an error message (if applicable)
#' @export
#'
.validate_exogenous <- function(df, h, X_df){

status <- list(validation = TRUE,
message = NULL
)

# Check if df and X_df contain the same exogenous variables
vals_df <- setdiff(names(df), c("unique_id", "ds", "y"))
vals_X_df <- setdiff(names(X_df), c("unique_id", "ds"))

if(!setequal(vals_df, vals_X_df)){
status$valdiation <- FALSE
status$message <- "df and X_df must contain the same exogenous variables."
}

# Check if the future values of the exogenous variables cover the forecast horizon
future_vals <- X_df |>
dplyr::group_by(.data$unique_id) |>
dplyr::filter(dplyr::n() == h)

if(length(unique(future_vals$unique_id)) != length(unique(X_df$unique_id))){
status$validation <- FALSE
status$message <- "The future values of the exogenous variables must cover the forecast horizon"
}

return(status)
}
10 changes: 5 additions & 5 deletions man/dot-timegpt_data_prep.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/dot-validate_exogenous.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0a09e9c

Please sign in to comment.