Changes to core functions: Consolidated API call to one URL and fixed…

… issue with plot for single series.
Nixtla · Dec 12, 2023 · 85d8c84 · 85d8c84
1 parent 1e7ef43
commit 85d8c84
Show file tree

Hide file tree

Showing 13 changed files with 154 additions and 118 deletions.
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -0,0 +1,48 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+  release:
+    types: [published]
+  workflow_dispatch:
+
+name: pkgdown
+
+jobs:
+  pkgdown:
+    runs-on: ubuntu-latest
+    # Only restrict concurrency for non-PR jobs
+    concurrency:
+      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::pkgdown, local::.
+          needs: website
+
+      - name: Build site
+        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+        shell: Rscript {0}
+
+      - name: Deploy to GitHub pages 🚀
+        if: github.event_name != 'pull_request'
+        uses: JamesIves/[email protected]
+        with:
+          clean: false
+          branch: gh-pages
+          folder: docs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -14,13 +14,13 @@ Depends:
     R (>= 2.10)
 LazyData: true
 Imports: 
-    data.table,
     dplyr,
     ggplot2,
     httr2,
     lubridate,
     rlang,
     tidyr,
+    tidyselect,
     tsibble
 Suggests: 
     httptest2,

diff --git a/NAMESPACE b/NAMESPACE
@@ -37,4 +37,5 @@ importFrom(lubridate,ymd)
 importFrom(lubridate,ymd_hms)
 importFrom(rlang,.data)
 importFrom(tidyr,pivot_longer)
+importFrom(tidyselect,everything)
 importFrom(tsibble,is_tsibble)
diff --git a/R/nixtlaR-package.R b/R/nixtlaR-package.R
@@ -30,6 +30,7 @@
 #' @importFrom lubridate ymd_hms
 #' @importFrom rlang .data
 #' @importFrom tidyr pivot_longer
+#' @importFrom tidyselect everything
 #' @importFrom tsibble is_tsibble
 ## usethis namespace: end
 NULL
diff --git a/R/timegpt_anomaly_detection.R b/R/timegpt_anomaly_detection.R
@@ -14,20 +14,16 @@
 #'
 timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds", target_col="y", level=c(99), clean_ex_first=TRUE, model="timegpt-1"){
 
-  # Validation ----
-  if(!tsibble::is_tsibble(df) & !is.data.frame(df)){
-    stop("Only tsibbles or data frames are allowed.")
-  }
-
   # Prepare data ----
+  url_anomaly <- "https://dashboard.nixtla.io/api/timegpt_multi_series_anomalies"
   if(is.null(id_col)){
-    url_anomaly <- "Write here the url for the single series case"
-  }else{
-    url_anomaly <- "https://dashboard.nixtla.io/api/timegpt_multi_series_anomalies"
+    # create unique_id for single series
+    df <- df |>
+      dplyr::mutate(unique_id = "id") |>
+      dplyr::select(c("unique_id", tidyselect::everything()))
   }
 
   data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col)
-  df <- data$df
   freq <- data$freq
   y <- data$y
 
@@ -38,6 +34,8 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds",
     clean_ex_first = clean_ex_first
   )
 
+  names(df)[which(names(df) == time_col)] <- "ds"
+  names(df)[which(names(df) == target_col)] <- "y"
   if(any(!(names(df) %in% c("unique_id", "ds", "y")))){
     exogenous <- df |>
       dplyr::select(-y)
@@ -51,7 +49,7 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds",
   }
 
   if(length(level) > 1){
-    message("Multiple levels are not allowed for anomaly detection. Will use the largest.")
+    message("Multiple levels are not allowed for anomaly detection. Will use the largest level.")
   }
   level <- as.list(level)
   timegpt_data[["level"]] <- level
@@ -69,26 +67,21 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds",
 
   # Extract anomalies ----
   anomaly <- httr2::resp_body_json(resp_anomaly)
-  if(is.null(id_col)){
-    # Write here the code for the single series case once the url is available
-    res = 42
-  }else{
-    anomaly_list <- lapply(anomaly$data$forecast$data, unlist)
-    res <- data.frame(do.call(rbind, anomaly_list))
-    colnames(res) <- anomaly$data$forecast$columns
-    res[,3:ncol(res)] <- lapply(res[,3:ncol(res)], as.numeric)
-  }
+  anomaly_list <- lapply(anomaly$data$forecast$data, unlist)
+  res <- data.frame(do.call(rbind, anomaly_list))
+  colnames(res) <- anomaly$data$forecast$columns
+  res[,3:ncol(res)] <- lapply(res[,3:ncol(res)], as.numeric)
 
   # Data transformation ----
   if(tsibble::is_tsibble(df)){
     res$ds <- switch(freq,
-                      "Y" = as.numeric(substr(res$ds, 1, 4)),
-                      "A" = as.numeric(substr(res$ds, 1, 4)),
-                      "Q" = tsibble::yearquarter(res$ds),
-                      "MS" = tsibble::yearmonth(res$ds),
-                      "W" = tsibble::yearweek(res$ds),
-                      "H" = lubridate::ymd_hms(res$ds),
-                      lubridate::ymd(res$ds) # default (daily or other)
+                     "Y" = as.numeric(substr(res$ds, 1, 4)),
+                     "A" = as.numeric(substr(res$ds, 1, 4)),
+                     "Q" = tsibble::yearquarter(res$ds),
+                     "MS" = tsibble::yearmonth(res$ds),
+                     "W" = tsibble::yearweek(res$ds),
+                     "H" = lubridate::ymd_hms(res$ds),
+                     lubridate::ymd(res$ds) # default (daily or other)
     )
     if(is.null(id_col)){
       res <- tsibble::as_tsibble(res, index="ds")
@@ -108,6 +101,10 @@ timegpt_anomaly_detection <- function(df, freq=NULL, id_col=NULL, time_col="ds",
   colnames(res)[which(colnames(res) == "ds")] <- time_col
   if(!is.null(id_col)){
     colnames(res)[which(colnames(res) == "unique_id")] <- id_col
+  }else{
+    # remove unique_id column
+    res <- res |>
+      dplyr::select(-unique_id)
   }
 
   return(res)

diff --git a/R/timegpt_cross_validation.R b/R/timegpt_cross_validation.R
@@ -19,20 +19,15 @@
 #'
 timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", target_col="y", X_df=NULL, level=NULL, n_windows=1, step_size=NULL, finetune_steps=0, clean_ex_first=TRUE, model="timegpt-1"){
 
-  # Validation ----
-  if(!tsibble::is_tsibble(df) & !is.data.frame(df)){
-    stop("Only tsibbles or data frames are allowed.")
-  }
-
   # Prepare data ----
+  url_cv <- "https://dashboard.nixtla.io/api/timegpt_multi_series_cross_validation"
   if(is.null(id_col)){
-    url_cv <- "Write here the url for the single series case"
-  }else{
-    url_cv <- "https://dashboard.nixtla.io/api/timegpt_multi_series_cross_validation"
+    df <- df |>
+      dplyr::mutate(unique_id = "id") |>
+      dplyr::select(c("unique_id", tidyselect::everything()))
   }
 
   data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col)
-  df <- data$df
   freq <- data$freq
   y <- data$y
 
@@ -53,6 +48,7 @@ timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col="
 
   if(!is.null(X_df)){
     names(X_df)[which(names(X_df) == time_col)] <- "ds"
+    names(X_df)[which(names(X_df) == target_col)] <- "y"
     if(!is.null(id_col)){
       names(X_df)[which(names(X_df) == id_col)] <- "unique_id"
     }
@@ -88,16 +84,11 @@ timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col="
 
   # Extract cross-validation ----
   cv <- httr2::resp_body_json(resp_cv)
-  if(is.null(id_col)){
-    # Write here the code for the single series case once the url is available
-    res = 42
-  }else{
-    cv_list <- lapply(cv$data$forecast$data, unlist)
-    res <- data.frame(do.call(rbind, cv_list))
-    colnames(res) <- cv$data$forecast$columns
-    res[,4:ncol(res)] <- lapply(res[,4:ncol(res)], as.numeric)
-    res$cutoff <- lubridate::ymd_hms(res$cutoff)
-  }
+  cv_list <- lapply(cv$data$forecast$data, unlist)
+  res <- data.frame(do.call(rbind, cv_list))
+  colnames(res) <- cv$data$forecast$columns
+  res[,4:ncol(res)] <- lapply(res[,4:ncol(res)], as.numeric)
+  res$cutoff <- lubridate::ymd_hms(res$cutoff)
 
   # Data transformation ----
   if(tsibble::is_tsibble(df)){
@@ -139,6 +130,10 @@ timegpt_cross_validation <- function(df, h=8, freq=NULL, id_col=NULL, time_col="
   colnames(res)[which(colnames(res) == "ds")] <- time_col
   if(!is.null(id_col)){
     colnames(res)[which(colnames(res) == "unique_id")] <- id_col
+  }else{
+    # remove unique_id column
+    res <- res |>
+      dplyr::select(-unique_id)
   }
 
   return(res)

diff --git a/R/timegpt_data_prep.R b/R/timegpt_data_prep.R
@@ -35,18 +35,11 @@
   }
 
   # Prepare data
-  if("unique_id" %in% names(df)){
-    df <- df[,c("unique_id", "ds", "y")]
-    y <- list(
-      columns = names(df),
-      data = lapply(1:nrow(df), function(i) as.list(df[i,]))
+  df <- df[,c("unique_id", "ds", "y")]
+  y <- list(
+    columns = names(df),
+    data = lapply(1:nrow(df), function(i) as.list(df[i,]))
     )
-  }else{
-    # only "ds" and "y" columns
-    y <- df$y
-    names(y) <- df$ds
-    y <- as.list(y)
-  }
 
   res <- list(freq = freq,
               y = y

diff --git a/R/timegpt_forecast.R b/R/timegpt_forecast.R
@@ -18,16 +18,14 @@
 #'
 timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", target_col="y", X_df=NULL, level=NULL, finetune_steps=0, clean_ex_first=TRUE, add_history=FALSE, model="timegpt-1"){
 
-  # Validation ----
-  if(!tsibble::is_tsibble(df) & !is.data.frame(df)){
-    stop("Only tsibbles or data frames are allowed.")
-  }
-
   # Prepare data ----
+  url <- "https://dashboard.nixtla.io/api/timegpt_multi_series"
+
   if(is.null(id_col)){
-    url <- "https://dashboard.nixtla.io/api/timegpt"
-  }else{
-    url <- "https://dashboard.nixtla.io/api/timegpt_multi_series"
+    # create unique_id for single series
+    df <- df |>
+      dplyr::mutate(unique_id = "id") |>
+      dplyr::select(c("unique_id", tidyselect::everything()))
   }
 
   data <- .timegpt_data_prep(df, freq, id_col, time_col, target_col)
@@ -80,26 +78,13 @@ timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", tar
 
   # Extract forecast ----
   fc <- httr2::resp_body_json(resp)
-
-  if(is.null(id_col)){
-    idx <- grep("^(timestamp|value|lo|hi)", names(fc$data))
-    fc_list <- fc$data[idx]
-    fcst <- data.frame(lapply(fc_list, unlist), stringsAsFactors=FALSE)
-    names(fcst) <- names(fc_list)
-    names(fcst)[1:2] <- c("ds", "TimeGPT")
-    if(!is.null(level)){
-      idx_level <- grep("^(lo|hi)", names(fcst))
-      names(fcst)[idx_level] <- paste0("TimeGPT-", names(fcst)[idx_level])
-    }
+  fc_list <- lapply(fc$data$forecast$data, unlist)
+  fcst <- data.frame(do.call(rbind, fc_list))
+  names(fcst) <- fc$data$forecast$columns
+  if(!is.null(level)){
+    fcst[,3:ncol(fcst)] <- lapply(fcst[,3:ncol(fcst)], as.numeric)
   }else{
-    fc_list <- lapply(fc$data$forecast$data, unlist)
-    fcst <- data.frame(do.call(rbind, fc_list))
-    names(fcst) <- fc$data$forecast$columns
-    if(!is.null(level)){
-      fcst[,3:ncol(fcst)] <- lapply(fcst[,3:ncol(fcst)], as.numeric)
-    }else{
-      fcst$TimeGPT <- as.numeric(fcst$TimeGPT)
-    }
+    fcst$TimeGPT <- as.numeric(fcst$TimeGPT)
   }
 
   # Data transformation ----
@@ -119,7 +104,6 @@ timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", tar
       fcst <- tsibble::as_tsibble(fcst, key="unique_id", index="ds")
     }
   }else{
-    # If df is a data frame, convert ds to dates
     if(freq == "H"){
       fcst$ds <- lubridate::ymd_hms(fcst$ds)
     }else{
@@ -131,6 +115,10 @@ timegpt_forecast <- function(df, h=8, freq=NULL, id_col=NULL, time_col="ds", tar
   names(fcst)[which(names(fcst) == "ds")] <- time_col
   if(!is.null(id_col)){
     names(fcst)[which(names(fcst) == "unique_id")] <- id_col
+  }else{
+    # remove unique_id column
+    fcst <- fcst |>
+      dplyr::select(-unique_id)
   }
 
   # Generate fitted values ----