diff --git a/NAMESPACE b/NAMESPACE index 23c15bc..41787e3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(.validate_exogenous) export(date_conversion) export(infer_frequency) export(nixtla_set_token) diff --git a/R/timegpt_anomaly_detection.R b/R/timegpt_anomaly_detection.R index ce02b38..a01d3f8 100644 --- a/R/timegpt_anomaly_detection.R +++ b/R/timegpt_anomaly_detection.R @@ -15,12 +15,17 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_col="y", level=c(99), clean_ex_first=TRUE, model="timegpt-1"){ # Prepare data ---- - url_anomaly <- "https://dashboard.nixtla.io/api/timegpt_multi_series_anomalies" + names(df)[which(names(df) == time_col)] <- "ds" + names(df)[which(names(df) == target_col)] <- "y" + if(is.null(id_col)){ # create unique_id for single series df <- df |> - dplyr::mutate(unique_id = "id") |> + dplyr::mutate(unique_id = "ts_0") |> dplyr::select(c("unique_id", tidyselect::everything())) + }else{ + # id_col is not NULL + names(df)[which(names(df) == id_col)] <- "unique_id" } data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col) @@ -34,11 +39,10 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds", clean_ex_first = clean_ex_first ) - names(df)[which(names(df) == time_col)] <- "ds" - names(df)[which(names(df) == target_col)] <- "y" - if(any(!(names(df) %in% c("unique_id", "ds", "y")))){ - exogenous <- df |> - dplyr::select(-y) + if(!any(names(df) %in% c("unique_id", "ds", "y"))){ + # input includes exogenous variables + exogenous <- df |> + dplyr::select(-c(.data$y)) x <- list( columns = names(exogenous), @@ -55,6 +59,7 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds", timegpt_data[["level"]] <- level # Make request ---- + url_anomaly <- "https://dashboard.nixtla.io/api/timegpt_multi_series_anomalies" resp_anomaly <- httr2::request(url_anomaly) |> httr2::req_headers( "accept" = "application/json", @@ -104,7 +109,7 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds", }else{ # remove unique_id column res <- res |> - dplyr::select(-unique_id) + dplyr::select(-c(.data$unique_id)) } return(res) diff --git a/R/timegpt_cross_validation.R b/R/timegpt_cross_validation.R index 52192e0..4319694 100644 --- a/R/timegpt_cross_validation.R +++ b/R/timegpt_cross_validation.R @@ -20,11 +20,16 @@ timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", target_col="y", X_df=NULL, level=NULL, n_windows=1, step_size=NULL, finetune_steps=0, clean_ex_first=TRUE, model="timegpt-1"){ # Prepare data ---- - url_cv <- "https://dashboard.nixtla.io/api/timegpt_multi_series_cross_validation" + names(df)[which(names(df) == time_col)] <- "ds" + names(df)[which(names(df) == target_col)] <- "y" + if(is.null(id_col)){ df <- df |> - dplyr::mutate(unique_id = "id") |> + dplyr::mutate(unique_id = "ts_0") |> dplyr::select(c("unique_id", tidyselect::everything())) + }else{ + # id_col is not NULL + names(df)[which(names(df) == id_col)] <- "unique_id" } data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col) @@ -48,11 +53,20 @@ timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col=" if(!is.null(X_df)){ names(X_df)[which(names(X_df) == time_col)] <- "ds" - names(X_df)[which(names(X_df) == target_col)] <- "y" - if(!is.null(id_col)){ + if(is.null(id_col)){ + X_df <- X_df |> + dplyr::mutate(unique_id = "ts_0") |> + dplyr::select(c("unique_id", tidyselect::everything())) + }else{ names(X_df)[which(names(X_df) == id_col)] <- "unique_id" } + # Validation checks for exogenous variables + status <- .validate_exogenous(df, h, X_df) + if(!status$validation){ + stop(print(status$message)) + } + exogenous <- df |> dplyr::select(-y) @@ -72,6 +86,7 @@ timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col=" } # Make request ---- + url_cv <- "https://dashboard.nixtla.io/api/timegpt_multi_series_cross_validation" resp_cv <- httr2::request(url_cv) |> httr2::req_headers( "accept" = "application/json", @@ -133,7 +148,7 @@ timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col=" }else{ # remove unique_id column res <- res |> - dplyr::select(-unique_id) + dplyr::select(-c(.data$unique_id)) } return(res) diff --git a/R/timegpt_data_prep.R b/R/timegpt_data_prep.R index e4f0be3..faafa09 100644 --- a/R/timegpt_data_prep.R +++ b/R/timegpt_data_prep.R @@ -1,11 +1,11 @@ #' Prepares data for TimeGPT's API -#' This is a private function of the package +#' This is a private function of nixtlar #' #' @param df A tsibble or a data frame with time series data. #' @param freq Frequency of the data. -#' @param id_col Column that identifies each series. -#' @param time_col Column that identifies each timestep. -#' @param target_col Column that contains the target variable. +#' @param id_col Column that identifies each series. Should be named unique_id. +#' @param time_col Column that identifies each timestep. Should be named ds. +#' @param target_col Column that contains the target variable. Should be named y. #' #' @return A list with the given or inferred frequency, the prepared data, and the original data frame renamed. #' @@ -15,13 +15,6 @@ stop("Only tsibbles or data frames are allowed.") } - # Rename columns - names(df)[which(names(df) == time_col)] <- "ds" - names(df)[which(names(df) == target_col)] <- "y" - if(!is.null(id_col)){ - names(df)[which(names(df) == id_col)] <- "unique_id" - } - # If df is a tsibble, convert dates to strings and infer frequency if necessary if(tsibble::is_tsibble(df)){ res <- date_conversion(df) @@ -35,10 +28,11 @@ } # Prepare data - df <- df[,c("unique_id", "ds", "y")] + filtered_df <- df[,c("unique_id", "ds", "y")] + y <- list( - columns = names(df), - data = lapply(1:nrow(df), function(i) as.list(df[i,])) + columns = names(filtered_df), + data = lapply(1:nrow(filtered_df), function(i) as.list(filtered_df[i,])) ) res <- list(freq = freq, diff --git a/R/timegpt_forecast.R b/R/timegpt_forecast.R index 0ef691c..f6ec2bc 100644 --- a/R/timegpt_forecast.R +++ b/R/timegpt_forecast.R @@ -19,13 +19,17 @@ timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", target_col="y", X_df=NULL, level=NULL, finetune_steps=0, clean_ex_first=TRUE, add_history=FALSE, model="timegpt-1"){ # Prepare data ---- - url <- "https://dashboard.nixtla.io/api/timegpt_multi_series" + names(df)[which(names(df) == time_col)] <- "ds" + names(df)[which(names(df) == target_col)] <- "y" if(is.null(id_col)){ # create unique_id for single series df <- df |> - dplyr::mutate(unique_id = "id") |> + dplyr::mutate(unique_id = "ts_0") |> dplyr::select(c("unique_id", tidyselect::everything())) + }else{ + # id_col is not NULL + names(df)[which(names(df) == id_col)] <- "unique_id" } data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col) @@ -43,10 +47,20 @@ timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", tar if(!is.null(X_df)){ names(X_df)[which(names(X_df) == time_col)] <- "ds" - if(!is.null(id_col)){ + if(is.null(id_col)){ + X_df <- X_df |> + dplyr::mutate(unique_id = "ts_0") |> + dplyr::select(c("unique_id", tidyselect::everything())) + }else{ names(X_df)[which(names(X_df) == id_col)] <- "unique_id" } + # Validation checks for exogenous variables + status <- .validate_exogenous(df, h, X_df) + if(!status$validation){ + stop(print(status$message)) + } + exogenous <- df |> dplyr::select(-y) @@ -66,6 +80,7 @@ timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", tar } # Make request ---- + url <- "https://dashboard.nixtla.io/api/timegpt_multi_series" resp <- httr2::request(url) |> httr2::req_headers( "accept" = "application/json", @@ -118,7 +133,7 @@ timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", tar }else{ # remove unique_id column fcst <- fcst |> - dplyr::select(-unique_id) + dplyr::select(-c(.data$unique_id)) } # Generate fitted values ---- diff --git a/R/timegpt_historic.R b/R/timegpt_historic.R index 567fbef..94d6853 100644 --- a/R/timegpt_historic.R +++ b/R/timegpt_historic.R @@ -15,13 +15,17 @@ timegpt_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_col="y", level=NULL, finetune_steps=0, clean_ex_first=TRUE){ # Prepare data ---- - url_historic <- "https://dashboard.nixtla.io/api/timegpt_multi_series_historic" + names(df)[which(names(df) == time_col)] <- "ds" + names(df)[which(names(df) == target_col)] <- "y" if(is.null(id_col)){ # create unique_id for single series df <- df |> - dplyr::mutate(unique_id = "id") |> + dplyr::mutate(unique_id = "ts_0") |> dplyr::select(c("unique_id", tidyselect::everything())) + }else{ + # id_col is not NULL + names(df)[which(names(df) == id_col)] <- "unique_id" } data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col) @@ -35,12 +39,10 @@ timegpt_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_c clean_ex_first = clean_ex_first ) - names(df)[which(names(df) == time_col)] <- "ds" - names(df)[which(names(df) == target_col)] <- "y" - - if(any(!(names(df) %in% c("unique_id", "ds", "y")))){ - exogenous <- df |> - dplyr::select(-y) + if(!any(names(df) %in% c("unique_id", "ds", "y"))){ + # input includes exogenous variables + exogenous <- df |> + dplyr::select(-c(.data$y)) x <- list( columns = names(exogenous), @@ -56,6 +58,7 @@ timegpt_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_c } # Make request ---- + url_historic <- "https://dashboard.nixtla.io/api/timegpt_multi_series_historic" resp_hist <- httr2::request(url_historic) |> httr2::req_headers( "accept" = "application/json", @@ -105,7 +108,7 @@ timegpt_historic <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_c }else{ # remove unique_id column fitted <- fitted |> - dplyr::select(-unique_id) + dplyr::select(-c(.data$unique_id)) } return(fitted) diff --git a/R/validate_exogenous.R b/R/validate_exogenous.R new file mode 100644 index 0000000..6411416 --- /dev/null +++ b/R/validate_exogenous.R @@ -0,0 +1,37 @@ +#' Validate exogenous variables (if applicable) +#' This is a private function of nixtlar +#' +#' @param df A tsibble or a data frame with time series data. +#' @param h Forecast horizon. +#' @param X_df A tsibble or a data frame with future exogenous variables. +#' +#' @return A list with the result of the validation (TRUE/FALSE) and an error message (if applicable) +#' @export +#' +.validate_exogenous <- function(df, h, X_df){ + + status <- list(validation = TRUE, + message = NULL + ) + + # Check if df and X_df contain the same exogenous variables + vals_df <- setdiff(names(df), c("unique_id", "ds", "y")) + vals_X_df <- setdiff(names(X_df), c("unique_id", "ds")) + + if(!setequal(vals_df, vals_X_df)){ + status$valdiation <- FALSE + status$message <- "df and X_df must contain the same exogenous variables." + } + + # Check if the future values of the exogenous variables cover the forecast horizon + future_vals <- X_df |> + dplyr::group_by(.data$unique_id) |> + dplyr::filter(dplyr::n() == h) + + if(length(unique(future_vals$unique_id)) != length(unique(X_df$unique_id))){ + status$validation <- FALSE + status$message <- "The future values of the exogenous variables must cover the forecast horizon" + } + + return(status) +} diff --git a/man/dot-timegpt_data_prep.Rd b/man/dot-timegpt_data_prep.Rd index 17691ec..594c98a 100644 --- a/man/dot-timegpt_data_prep.Rd +++ b/man/dot-timegpt_data_prep.Rd @@ -3,7 +3,7 @@ \name{.timegpt_data_prep} \alias{.timegpt_data_prep} \title{Prepares data for TimeGPT's API -This is a private function of the package} +This is a private function of nixtlar} \usage{ .timegpt_data_prep(df, freq, id_col, time_col, target_col) } @@ -12,16 +12,16 @@ This is a private function of the package} \item{freq}{Frequency of the data.} -\item{id_col}{Column that identifies each series.} +\item{id_col}{Column that identifies each series. Should be named unique_id.} -\item{time_col}{Column that identifies each timestep.} +\item{time_col}{Column that identifies each timestep. Should be named ds.} -\item{target_col}{Column that contains the target variable.} +\item{target_col}{Column that contains the target variable. Should be named y.} } \value{ A list with the given or inferred frequency, the prepared data, and the original data frame renamed. } \description{ Prepares data for TimeGPT's API -This is a private function of the package +This is a private function of nixtlar } diff --git a/man/dot-validate_exogenous.Rd b/man/dot-validate_exogenous.Rd new file mode 100644 index 0000000..88ac819 --- /dev/null +++ b/man/dot-validate_exogenous.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validate_exogenous.R +\name{.validate_exogenous} +\alias{.validate_exogenous} +\title{Validate exogenous variables (if applicable) +This is a private function of nixtlar} +\usage{ +.validate_exogenous(df, h, X_df) +} +\arguments{ +\item{df}{A tsibble or a data frame with time series data.} + +\item{h}{Forecast horizon.} + +\item{X_df}{A tsibble or a data frame with future exogenous variables.} +} +\value{ +A list with the result of the validation (TRUE/FALSE) and an error message (if applicable) +} +\description{ +Validate exogenous variables (if applicable) +This is a private function of nixtlar +}