Skip to content

Commit

Permalink
Merge pull request #27 from Nixtla/v2-issues
Browse files Browse the repository at this point in the history
fix: release v0.6.0 issues
  • Loading branch information
MMenchero authored Oct 9, 2024
2 parents 3cb04e4 + 4c54e8d commit 9c6522c
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 27 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: nixtlar
Title: A Software Development Kit for 'Nixtla''s 'TimeGPT'
Version: 0.5.4
Version: 0.6.0
Authors@R: c(
person("Mariana", "Menchero", email = "[email protected]", role = c("aut", "cre"), comment = "First author and maintainer"),
person("Nixtla", role = "cph", comment = "Copyright held by 'Nixtla'")
Expand Down
46 changes: 32 additions & 14 deletions R/generate_output_dates.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,46 @@
#' dates_df <- .generate_output_dates(df_info, freq, h)
#' }
#'
.generate_output_dates <- function(df_info, freq, h) {
new_dates <- lapply(1:nrow(df_info), function(i) {
.generate_output_dates <- function(df_info, freq, h){

new_dates <- vector("list", nrow(df_info))
r_freq <- .r_frequency(freq)

for(i in 1:nrow(df_info)){
start_date <- df_info$dates[i]
r_freq <- .r_frequency(freq)

if(freq == "QE") {
# End of quarter dates are: "YYY-03-31", "YYYY-06-30", "YYYY-09-30" and "YYYY-12-31".
dt <- seq(from = start_date, by = "quarter", length.out = h+1)
if(freq %in% c("QE", "Q")){
dt <- seq(from = start_date, by = r_freq, length.out = h+1)
month <- lubridate::month(start_date)
dt <- seq(from = start_date, by = "quarter", length.out = h+1)

# Calendar adjustments
if (month %in% c(3, 12)) {
dt <- ifelse(lubridate::month(dt) %in% c(7, 10), dt - lubridate::days(1), dt)
for (j in 1:length(dt)) {
mt <- lubridate::month(dt[j])
if (mt %in% c(7, 10)) {
dt[j] <- dt[j] - lubridate::days(1)
}
}
} else {
dt <- ifelse(lubridate::month(dt) %in% c(3, 12), dt + lubridate::days(1), dt)
# month %in% c(6, 9)
for (j in 1:length(dt)) {
mt <- lubridate::month(dt[j])
if (mt %in% c(3, 12)) {
dt[j] <- dt[j] + lubridate::days(1)
}
}
}
} else if(freq == "ME") {
dt <- seq(from = start_date + lubridate::days(1), by = r_freq, length.out = h+1) - lubridate::days(1)
} else {
dt <- seq(from = start_date, by = r_freq, length.out = h+1)

}else if(freq %in% c("ME", "M")){
start_date <- start_date+lubridate::days(1)
dt <- seq(from = start_date, by = r_freq, length.out = h+1)-lubridate::days(1)
}else{
dt <- seq(df_info$dates[i], by = r_freq, length.out = h+1)
}

dt[2:length(dt)]
})
new_dates[[i]] <- dt[2:length(dt)]
}

dates_df <- data.frame(lapply(new_dates, as.POSIXct))

Expand Down
26 changes: 21 additions & 5 deletions R/infer_frequency.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,33 @@ infer_frequency <- function(df, freq){
return(freq)
}

num_chars <- nchar(as.character(df$ds[1]))
if(length(unique(df$ds)) > 1){ # this is done to avoid the vanishing dates issue
dt <- sample(df$ds, 2)
}else{
dt <- df$ds[1]
}

# Vanishing dates issue: Dates that correspond to midnight only show YYYY-MM-DD, excluding 00:00:00

num_chars <- max(nchar(as.character(dt)))

if(num_chars <= 10){
# assumes dates in format YYYY-MM-DD
dates <- lubridate::ymd(sort(unique(df$ds)))
if(inherits(df$ds, "character")){
dates <- lubridate::ymd(sort(unique(df$ds)))
}else{
dates <- sort(unique(df$ds))
}
dates_diff <- diff(dates)
dates_table <- table(dates_diff)
mode <- as.numeric(names(which.max(dates_table)))

freq_list = list(
list(alias = "Y", value = c(365,366)),
list(alias = "Q", value = c(91,92)),
list(alias = "MS", value = c(30,31)),
list(alias = "M", value = c(30,31)),
list(alias = "W", value = c(7)),
list(alias = "D", value = c(1))
list(alias = "D", value = c(24,1))
)

for(item in freq_list){
Expand All @@ -45,7 +57,11 @@ infer_frequency <- function(df, freq){

}else{
# assumes dates in format YYYY-MM-DD hh:mm:ss
dates <- lubridate::ymd_hms(sort(unique(df$ds)))
if(inherits(df$ds, "character")){
dates <- lubridate::ymd_hms(sort(unique(df$ds)))
}else{
dates <- sort(unique(df$ds))
}
dates_diff <- diff(dates)
dates_table <- table(dates_diff)
mode <- as.numeric(names(which.max(dates_table)))
Expand Down
28 changes: 22 additions & 6 deletions R/nixtla_client_forecast.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,18 +222,34 @@ nixtla_client_forecast <- function(df, h=8, freq=NULL, id_col="unique_id", time_
}

# Add unique ids and dates to forecast ----
nch <- nchar(df_info$last_ds[1])
if(nch <= 10){
df_info$dates <- lubridate::ymd(df_info$last_ds)
if(inherits(df_info$last_ds, "character")){
if(length(df_info$last_ds) > 1){
dt <- sample(df_info$last_ds, 2)
}else{
dt <- df_info$last_ds[1]
}
nch <- max(nchar(as.character(dt)))
if(nch <= 10){
df_info$dates <- lubridate::ymd(df_info$last_ds)
}else{
df_info$dates <- lubridate::ymd_hms(df_info$last_ds)
}
}else{
df_info$dates <- lubridate::ymd_hms(df_info$last_ds)
# assumes df_info$last_ds is already a date-object
df_info$dates <- df_info$last_ds
}

dates_df <- .generate_output_dates(df_info, freq, h)

dates_long_df <- dates_df |>
tidyr::pivot_longer(cols = everything(), names_to = "unique_id", values_to = "ds") |>
dplyr::arrange(.data$unique_id)
tidyr::pivot_longer(cols = everything(), names_to = "unique_id", values_to = "ds")

if(inherits(df$unique_id, "integer")){
dates_long_df$unique_id <- as.numeric(dates_long_df$unique_id)
}

dates_long_df <- dates_long_df |>
dplyr::arrange(.data$unique_id)

forecast <- cbind(dates_long_df, fc)

Expand Down
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ We are excited to announce the release of `nixtlar` version 0.6.0, which integra

- **Date Formats**: For efficiency, `nixtlar` now strictly requires dates to be in the format `YYYY-MM-DD` or `YYYY-MM-DD hh:mm:ss`, either as character strings or date-time objects. For more details, please refer to our [Get Started](https://nixtla.github.io/nixtlar/articles/get-started.html) guide and [Data Requirements](https://nixtla.github.io/nixtlar/articles/data-requirements.html) vignette.

- **Default ID Column**: In alignment with the Python SDK, `nixtlar` now defaults the `id_col` to `unique_id`. This means you no longer need to specify this column if it is already named `unique_id`. If your dataset contains only one series, simply set `id_col=NULL`.
- **Default ID Column**: In alignment with the Python SDK, `nixtlar` now defaults the `id_col` to `unique_id`. This means you no longer need to specify this column if it is already named `unique_id`. If your dataset contains only one series, simply set `id_col=NULL`. The `id_col` only accepts characters or integers.

These changes leverage the capabilities of `TimeGPT`'s new API and align `nixtlar` more closely with the Python SDK, ensuring a better user experience.

Expand Down
2 changes: 2 additions & 0 deletions vignettes/data-requirements.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ head(df)
str(df)
```

The `id_col` only accepts characters or integers.

## 3. Exogenous Variables

When using exogenous variables, `nixtlar` differentiates between historical and future exogenous variables:
Expand Down

0 comments on commit 9c6522c

Please sign in to comment.