generated from NOAA-OWP/owp-open-source-project-template
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add alternate comid retrieval via sf geometry in case nwissite returns comid of NA * fix: add gage_id inside each loc_attrs df; fix: set fill=TRUE for rbindlist * fix: add usgs_vars sublist to Retr_Params * feat: add a format checker on Retr_Params * feat: add attribute variable name checker, incorporate check_attr_selection() into standard processing
- Loading branch information
Showing
4 changed files
with
174 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
Package: proc.attr.hydfab | ||
Title: Grab and process catchment attributes using the hydrofabric | ||
Version: 0.0.1.0013 | ||
Version: 0.0.1.0014 | ||
Authors@R: | ||
c(person("Guy", "Litt", , "[email protected]", role = c("aut", "cre"), | ||
comment = c(ORCID = "https://orcid.org/0000-0003-1996-7468")), | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#' @title Generate attributes for CAMELS basins | ||
#' @description This script uses the proc.attr.hydfab package to acquire attributes | ||
#' of interest. | ||
#' | ||
|
||
|
||
library(dplyr) | ||
library(glue) | ||
library(tidyr) | ||
library(yaml) | ||
library(proc.attr.hydfab) | ||
|
||
main <- function(){ | ||
# Define args supplied to command line | ||
home_dir <- Sys.getenv("HOME") | ||
|
||
############################ BEGIN CUSTOM MUNGING ############################ | ||
|
||
# ----------------------=-- Read in CAMELS gage ids ------------------------ # | ||
path_gages_ii <- glue::glue("{home_dir}/noaa/camels/gagesII_wood/gages_list.txt") | ||
dat_gages_ii <- read.csv(path_gages_ii) | ||
gage_ids <- base::lapply(1:nrow(dat_gages_ii), function(i) | ||
tail(strsplit(dat_gages_ii[i,],split = ' ',fixed = TRUE)[[1]],n=1)) |> | ||
unlist() |> | ||
lapply(function(x) | ||
gsub(pattern=".gpkg",replacement = "",x = x)) |> | ||
unlist() |> | ||
lapply( function(x) gsub(pattern = "Gage_", replacement = "",x=x)) |> | ||
unlist() | ||
|
||
utils::write.table(gage_ids,glue::glue('{home_dir}/noaa/camels/gagesII_wood/camels_ii_gage_ids.txt'),row.names = FALSE,col.names = FALSE) | ||
|
||
# --------------------- Read in usgs NHD attribute IDs --------------------- # | ||
# Read desired usgs nhdplus attributes, stored in NOAA shared drive here: | ||
# https://docs.google.com/spreadsheets/d/1h-630L2ChH5zlQIcWJHVaxY9YXtGowcCqakQEAXgRrY/edit?usp=sharing | ||
attrs_nhd_df <- read.csv(glue::glue("{home_dir}/noaa/regionalization/processing/usgs_nhdplus_attrs.csv")) | ||
|
||
attrs_nhd <- attrs_nhd_df$ID | ||
|
||
Retr_Params <- list(paths = list(dir_db_attrs = glue::glue("{home_dir}/noaa/regionalization/data/input/attributes/"), | ||
dir_std_base = glue::glue("{home_dir}/noaa/regionalization/data/input/user_data_std")), | ||
vars = list(usgs_vars = attrs_nhd), | ||
datasets = "camelsii_nhdp_grab_nov24", | ||
xtra_hfab = list(hfab_retr=FALSE)) | ||
|
||
|
||
############################ END CUSTOM MUNGING ############################## | ||
|
||
# ---------------------- Grab all needed attributes ---------------------- # | ||
# Now acquire the attributes: | ||
ls_comids <- proc.attr.hydfab::proc_attr_gageids(gage_ids=gage_ids, | ||
featureSource='nwissite', | ||
featureID='USGS-{gage_id}', | ||
Retr_Params=Retr_Params, | ||
overwrite=FALSE) | ||
|
||
message(glue::glue("Completed attribute acquisition for {Retr_Params$paths$dir_db_attrs}")) | ||
} | ||
|
||
|
||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Config for grabbing catchment attributes corresponding to standard-named locations | ||
# Two options exist for defining locations that need attributes. At least one must be used. Both may be used. | ||
# 1. Refer to a file/dataset {loc_id_filepath} with a column identifer {loc_id} representing a standardized location identifier. | ||
# 2. Refer to a dataset processed by fs_proc python package and point to its location, {dir_std_base}/{datasets}, where {datasets} is a specific subdirectory name(s) or simply 'all' | ||
|
||
col_schema: # required column mappings in the evaluation metrics dataset (if read in) | ||
- 'featureID': 'USGS-{gage_id}' # python f-string / R glue() format; converting the 'gage_id' to the standardized featureID used by nhdplusTools/hydrofabric. Must use '{gage_id}' e.g. 'USGS-{gage_id}' | ||
- 'featureSource': 'nwissite' # The standardized nhdplusTools featureSource. Possible featureSources might be 'nwissite', 'comid'. | ||
loc_id_read: # This section only required for locations NOT to be read in under a standardized dataset location (dir_std_base). May be used for additional prediction locations. MUST leave each item name inside list with empty assignments if no datasets desired. | ||
- 'gage_id': 'gage_id' # expects tabular dataset with this column name representing the location id. | ||
- 'loc_id_filepath': '' # Required. filepath. Allows reading of .csv or a dataset accessible using arrow::open_datast() in lieu of reading dataset generated by fs_proc. | ||
- 'featureID_loc' : 'USGS-{gage_id}' # python f-string / R glue() format; converting the 'loc_id' to the standardized featureID used by nhdplusTools/hydrofabric. Must use '{loc_id}' e.g. 'USGS-{loc_id}'. | ||
- 'featureSource_loc': 'nwissite' # The standardized nhdplusTools featureSource. | ||
file_io: # May define {home_dir} for python's '{home_dir}/string_path'.format(home_dir =str(Path.home())) functionality | ||
- 'save_loc': 'local' # #TODO implement once s3 becomes a capability. Use 'local' for saving to a local path via dir_save. Future work will create an approach for 'aws' or other cloud saving methods | ||
- 'dir_base' : '{home_dir}/noaa/regionalization/data/input' # Required. The save location of standardized output | ||
- 'dir_std_base' : '{dir_base}/user_data_std' # Required. The location of standardized data generated by fs_proc python package | ||
- 'dir_db_hydfab' : '{dir_base}/hydrofabric' # Required. The local dir where hydrofabric data are stored (limits the total s3 connections) | ||
- 'dir_db_attrs' : '{dir_base}/attributes' # Required. The parent dir where each comid's attribute parquet file is stored in the subdirectory 'comid/', and each dataset's aggregated parquet attributes are stored in the subdirectory '/{dataset_name} | ||
formulation_metadata: | ||
- 'datasets': # Required. Must match directory name inside dir_std_base. May be a list of items, or simply sublist 'all' to select everything inside dir_std_base for attribute grabbing. | ||
- 'juliemai-xSSA' # Required. In this example case, it's a sublist of just one thing. | ||
- 'formulation_base': 'Raven_blended' # Informational. Unique name of formulation. Optional. | ||
hydfab_config: # Required section describing hydrofabric connection details and objects of interest | ||
- 's3_base' : "s3://lynker-spatial/tabular-resources" # Required. s3 path containing hydrofabric-formatted attribute datasets | ||
- 's3_bucket' : 'lynker-spatial' # Required. s3 bucket containing hydrofabric data | ||
- 'ext' : 'gpkg' # Required. file extension of the hydrofrabric data. Default 'gpkg'. | ||
- 'hf_cat_sel': "total" # Required. Options include 'total' or 'all'; total: interested in the single location's aggregated catchment data; all: all subcatchments of interest | ||
attr_select: # Required. The names of variable sublistings are standardized, e.g. ha_vars, usgs_vars, sc_vars | ||
- 's3_path_hydatl' : '{s3_base}/hydroATLAS/hydroatlas_vars.parquet' # path to hydroatlas data formatted for hydrofabric. Required only if hydroatlas variables desired. | ||
- 'ha_vars': # hydroatlas variables. Must specify s3_path_hydatl if desired. | ||
- 'pet_mm_s01' | ||
- 'cly_pc_sav' | ||
- 'cly_pc_uav' | ||
- 'ari_ix_sav' | ||
- 'usgs_vars': # list of variables retrievable using nhdplusTools::get_characteristics_metadata(). | ||
- 'TOT_TWI' | ||
- 'TOT_PRSNOW' | ||
- 'TOT_POPDENS90' | ||
- 'TOT_EWT' | ||
- 'TOT_RECHG' | ||
- 'TOT_PPT7100_ANN' | ||
- 'TOT_AET' | ||
- 'TOT_PET' | ||
- 'TOT_SILTAVE' | ||
- 'TOT_BASIN_AREA' | ||
- 'TOT_BASIN_SLOPE' | ||
- 'TOT_ELEV_MEAN' | ||
- 'TOT_ELEV_MAX' | ||
- 'TOT_Intensity' | ||
- 'TOT_Wet' | ||
- 'TOT_Dry' | ||
- 'sc_vars': # Streamcat variables of interest. #TODO add streamcat grabber capability to proc.attr.hydfab | ||
- # In this example case, no streamcat variables selected |