From a4313107465fc943253f5c0a30157638745709c9 Mon Sep 17 00:00:00 2001 From: adrifoster Date: Tue, 29 Aug 2023 10:20:21 -0600 Subject: [PATCH 01/85] initial commit --- src/biogeochem/SatellitePhenologyMod.F90 | 49 ++++++++++++++++++++++++ src/main/clm_driver.F90 | 37 +----------------- 2 files changed, 51 insertions(+), 35 deletions(-) diff --git a/src/biogeochem/SatellitePhenologyMod.F90 b/src/biogeochem/SatellitePhenologyMod.F90 index 3e9341f430..54f2dcdb32 100644 --- a/src/biogeochem/SatellitePhenologyMod.F90 +++ b/src/biogeochem/SatellitePhenologyMod.F90 @@ -257,6 +257,55 @@ subroutine interpMonthlyVeg (bounds, canopystate_inst) end subroutine interpMonthlyVeg !============================================================================== + + logical function do_interpMonthlyVeg(use_cn, use_fates, use_fates_sp, doalb, n_drydep) + ! + ! !DESCRIPTION: + ! returns whether or not to conduct interpMonthlyVeg subroutine + ! + ! !ARGUMENTS: + logical, intent(in) :: use_cn ! are we using the big-leaf, BGC version of model? + logical, intent(in) :: use_fates ! are we running FATES? + logical, intent(in) :: use_fates_sp ! are we running FATES-SP? + logical, intent(in) :: doalb ! true if time for surface albedo calc + integer, intent(in) :: n_drydep ! number in drypdep list + + if (use_cn .and. n_drydep > 0) then + + ! For dry-deposition need to call CLMSP so that mlaidiff is obtained + ! NOTE: This is also true of FATES below + do_interpMonthlyVeg = .true. + + else if (use_fates .and. use_fates_sp) then + + ! For FATES-Specified phenology mode interpolate the weights for + ! time-interpolation of monthly vegetation data (as in SP mode below) + ! Also for FATES with dry-deposition as above need to call CLMSP so that mlaidiff is obtained + !if ( use_fates_sp .or. (n_drydep > 0 ) ) then ! Replace with this when we have dry-deposition working + ! For now don't allow for dry-deposition because of issues in #1044 EBK Jun/17/2022 + do_interpMonthlyVeg = .true. + + else if (doalb .or. n_drydep > 0) then + + ! Determine weights for time interpolation of monthly vegetation data. + ! This also determines whether it is time to read new monthly vegetation and + ! obtain updated leaf area index [mlai1,mlai2], stem area index [msai1,msai2], + ! vegetation top [mhvt1,mhvt2] and vegetation bottom [mhvb1,mhvb2]. The + ! weights obtained here are used in subroutine SatellitePhenology to obtain time + ! interpolated values. + ! This is also done for FATES-SP mode above + do_interpMonthlyVeg = .true. + + else + + do_interpMonthlyVeg = .false. + + end if + + end function do_interpMonthlyVeg + + !============================================================================== + subroutine readAnnualVegetation (bounds, canopystate_inst) ! ! !DESCRIPTION: diff --git a/src/main/clm_driver.F90 b/src/main/clm_driver.F90 index 3a5e981d74..61b5fc3ac7 100644 --- a/src/main/clm_driver.F90 +++ b/src/main/clm_driver.F90 @@ -58,7 +58,7 @@ module clm_driver use UrbanRadiationMod , only : UrbanRadiation ! use SoilBiogeochemVerticalProfileMod , only : SoilBiogeochemVerticalProfile - use SatellitePhenologyMod , only : SatellitePhenology, interpMonthlyVeg + use SatellitePhenologyMod , only : SatellitePhenology, interpMonthlyVeg, do_interpMonthlyVeg use ndepStreamMod , only : ndep_interp use cropcalStreamMod , only : cropcal_advance, cropcal_interp use ch4Mod , only : ch4, ch4_init_gridcell_balance_check, ch4_init_column_balance_check @@ -226,43 +226,10 @@ subroutine clm_drv(doalb, nextsw_cday, declinp1, declin, rstwr, nlend, rdate, ro ! Done in SP mode, FATES-SP mode and also when dry-deposition is active ! ============================================================================ - if (use_cn) then - ! For dry-deposition need to call CLMSP so that mlaidiff is obtained - ! NOTE: This is also true of FATES below - if ( n_drydep > 0 ) then - call t_startf('interpMonthlyVeg') - call interpMonthlyVeg(bounds_proc, canopystate_inst) - call t_stopf('interpMonthlyVeg') - endif - - elseif(use_fates) then - - ! For FATES-Specified phenology mode interpolate the weights for - ! time-interpolation of monthly vegetation data (as in SP mode below) - ! Also for FATES with dry-deposition as above need to call CLMSP so that mlaidiff is obtained - !if ( use_fates_sp .or. (n_drydep > 0 ) ) then ! Replace with this when we have dry-deposition working - ! For now don't allow for dry-deposition because of issues in #1044 EBK Jun/17/2022 - if ( use_fates_sp ) then + if (do_interpMonthlyVeg(use_cn, use_fates, use_fates_sp, doalb, n_drydep)) then call t_startf('interpMonthlyVeg') call interpMonthlyVeg(bounds_proc, canopystate_inst) call t_stopf('interpMonthlyVeg') - end if - - else - - ! Determine weights for time interpolation of monthly vegetation data. - ! This also determines whether it is time to read new monthly vegetation and - ! obtain updated leaf area index [mlai1,mlai2], stem area index [msai1,msai2], - ! vegetation top [mhvt1,mhvt2] and vegetation bottom [mhvb1,mhvb2]. The - ! weights obtained here are used in subroutine SatellitePhenology to obtain time - ! interpolated values. - ! This is also done for FATES-SP mode above - if ( doalb .or. ( n_drydep > 0 ) )then - call t_startf('interpMonthlyVeg') - call interpMonthlyVeg(bounds_proc, canopystate_inst) - call t_stopf('interpMonthlyVeg') - end if - end if ! ================================================================================== From 460fa670d76b0c6e7a615075faff26154962b2b2 Mon Sep 17 00:00:00 2001 From: adrifoster Date: Tue, 29 Aug 2023 10:26:40 -0600 Subject: [PATCH 02/85] add new function to public list --- src/biogeochem/SatellitePhenologyMod.F90 | 1 + 1 file changed, 1 insertion(+) diff --git a/src/biogeochem/SatellitePhenologyMod.F90 b/src/biogeochem/SatellitePhenologyMod.F90 index 54f2dcdb32..7714af02eb 100644 --- a/src/biogeochem/SatellitePhenologyMod.F90 +++ b/src/biogeochem/SatellitePhenologyMod.F90 @@ -28,6 +28,7 @@ module SatellitePhenologyMod public :: SatellitePhenologyInit ! Dynamically allocate memory public :: interpMonthlyVeg ! interpolate monthly vegetation data public :: readAnnualVegetation ! Read in annual vegetation (needed for Dry-deposition) + public :: do_interpMonthlyVeg ! whether or not to call interpMonthlyVeg ! ! !PRIVATE MEMBER FUNCTIONS: private :: readMonthlyVegetation ! read monthly vegetation data for two months From cf7a00310b7f67d88226ccc5d2c3069a03c5271e Mon Sep 17 00:00:00 2001 From: Keith Oleson Date: Tue, 10 Oct 2023 16:00:55 -0600 Subject: [PATCH 03/85] View factor fix --- src/biogeophys/UrbBuildTempOleson2015Mod.F90 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/biogeophys/UrbBuildTempOleson2015Mod.F90 b/src/biogeophys/UrbBuildTempOleson2015Mod.F90 index bf8b68c7eb..83422f0971 100644 --- a/src/biogeophys/UrbBuildTempOleson2015Mod.F90 +++ b/src/biogeophys/UrbBuildTempOleson2015Mod.F90 @@ -424,10 +424,8 @@ subroutine BuildingTemperature (bounds, num_urbanl, filter_urbanl, num_nolakec, ! This view factor implicitly converts from per unit wall area to per unit floor area vf_wf(l) = 0.5_r8*(1._r8 - vf_rf(l)) - ! This view factor implicitly converts from per unit floor area to per unit wall area - vf_fw(l) = vf_wf(l) / building_hwr(l) + vf_fw(l) = vf_wf(l) - ! This view factor implicitly converts from per unit roof area to per unit wall area vf_rw(l) = vf_fw(l) ! This view factor implicitly converts from per unit wall area to per unit roof area @@ -831,7 +829,7 @@ subroutine BuildingTemperature (bounds, num_urbanl, filter_urbanl, num_nolakec, + em_floori(l)*sb*t_floor_bef(l)**4._r8 & + 4._r8*em_floori(l)*sb*t_floor_bef(l)**3.*(t_floor(l) - t_floor_bef(l)) - qrd_building(l) = qrd_roof(l) + building_hwr(l)*(qrd_sunw(l) + qrd_shdw(l)) + qrd_floor(l) + qrd_building(l) = qrd_roof(l) + qrd_sunw(l) + qrd_shdw(l) + qrd_floor(l) if (abs(qrd_building(l)) > .10_r8 ) then write (iulog,*) 'urban inside building net longwave radiation balance error ',qrd_building(l) From 1b3d377bf76d225d98fd545228625f49076a8671 Mon Sep 17 00:00:00 2001 From: Keith Oleson Date: Tue, 10 Oct 2023 16:42:25 -0600 Subject: [PATCH 04/85] Add comments --- src/biogeophys/UrbBuildTempOleson2015Mod.F90 | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/biogeophys/UrbBuildTempOleson2015Mod.F90 b/src/biogeophys/UrbBuildTempOleson2015Mod.F90 index 83422f0971..4c985f0ab3 100644 --- a/src/biogeophys/UrbBuildTempOleson2015Mod.F90 +++ b/src/biogeophys/UrbBuildTempOleson2015Mod.F90 @@ -383,9 +383,11 @@ subroutine BuildingTemperature (bounds, num_urbanl, filter_urbanl, num_nolakec, ! Get terms from soil temperature equations to compute conduction flux ! Negative is toward surface - heat added - ! Note that the conduction flux here is in W m-2 wall area but for purposes of solving the set of - ! simultaneous equations this must be converted to W m-2 floor area. This is done below when - ! setting up the equation coefficients. + ! Note that the convection and conduction fluxes for the walls are in W m-2 wall area + ! but for purposes of solving the set of simultaneous equations this must be converted to W m-2 + ! floor or roof area. This is done below when setting up the equation coefficients by multiplying by building_hwr. + ! Note also that the longwave radiation terms for the walls are in terms of W m-2 floor area since the view + ! factors implicitly convert from per unit wall area to per unit floor or roof area. do fc = 1,num_nolakec c = filter_nolakec(fc) From 6e15b4c72fe32debf4ae40ce19e2ed49a17099c5 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 19 Oct 2023 11:40:20 -0600 Subject: [PATCH 05/85] Automatically assign high priority items to project 25. --- .github/workflows/assign-to-project.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/assign-to-project.yml diff --git a/.github/workflows/assign-to-project.yml b/.github/workflows/assign-to-project.yml new file mode 100644 index 0000000000..5b3814488f --- /dev/null +++ b/.github/workflows/assign-to-project.yml @@ -0,0 +1,23 @@ +name: Auto Assign to Project(s) + +on: + issues: + types: [opened, labeled] + pull_request: + types: [opened, labeled] + issue_comment: + types: [created] + +jobs: + assign_high_priority: + runs-on: ubuntu-latest + name: Assign to High Priority project + steps: + - name: Assign issues and pull requests with `priority:high` label to project 25 + uses: srggrs/assign-one-project-github-action@1.3.1 + if: | + contains(github.event.issue.labels.*.name, 'priority:high') || + contains(github.event.pull_request.labels.*.name, 'priority:high') + with: + project: 'https://github.com/ESCOMP/CTSM/projects/25' + column_name: 'Needs triage' From 804c281fb8e91afcdad023d827238501d9757e08 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 19 Oct 2023 12:00:16 -0600 Subject: [PATCH 06/85] Correct label. --- .github/workflows/assign-to-project.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/assign-to-project.yml b/.github/workflows/assign-to-project.yml index 5b3814488f..8c6c259c33 100644 --- a/.github/workflows/assign-to-project.yml +++ b/.github/workflows/assign-to-project.yml @@ -13,11 +13,11 @@ jobs: runs-on: ubuntu-latest name: Assign to High Priority project steps: - - name: Assign issues and pull requests with `priority:high` label to project 25 + - name: Assign issues and pull requests with `priority: high` label to project 25 uses: srggrs/assign-one-project-github-action@1.3.1 if: | - contains(github.event.issue.labels.*.name, 'priority:high') || - contains(github.event.pull_request.labels.*.name, 'priority:high') + contains(github.event.issue.labels.*.name, 'priority: high') || + contains(github.event.pull_request.labels.*.name, 'priority: high') with: project: 'https://github.com/ESCOMP/CTSM/projects/25' column_name: 'Needs triage' From f9f38b61dd89ed8e79f61f6f6dbe24206303753e Mon Sep 17 00:00:00 2001 From: Teagan King Date: Wed, 10 Jan 2024 10:33:03 -0700 Subject: [PATCH 07/85] initial refactoring from refactoring branch --- python/ctsm/site_and_regional/arg_parse.py | 267 +++++++++ python/ctsm/site_and_regional/neon_site.py | 393 ++++++++++++ python/ctsm/site_and_regional/run_neon.py | 658 +-------------------- python/ctsm/test/test_unit_NeonSite.py | 64 ++ python/ctsm/test/test_unit_arg_parse.py | 64 ++ tools/site_and_regional/run_neon | 4 + 6 files changed, 804 insertions(+), 646 deletions(-) create mode 100644 python/ctsm/site_and_regional/arg_parse.py create mode 100755 python/ctsm/site_and_regional/neon_site.py create mode 100755 python/ctsm/test/test_unit_NeonSite.py create mode 100755 python/ctsm/test/test_unit_arg_parse.py diff --git a/python/ctsm/site_and_regional/arg_parse.py b/python/ctsm/site_and_regional/arg_parse.py new file mode 100644 index 0000000000..9025f5dde8 --- /dev/null +++ b/python/ctsm/site_and_regional/arg_parse.py @@ -0,0 +1,267 @@ +""" +Argument parser to use throughout run_neon.py +""" + +import argparse +import datetime +import logging +import os +import sys + +# Get the ctsm util tools and then the cime tools. +_CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python")) +sys.path.insert(1, _CTSM_PYTHON) + +from ctsm import add_cime_to_path + +from CIME import build +from CIME.case import Case +from CIME.utils import safe_copy, expect, parse_args_and_handle_standard_logging_options, setup_standard_logging_options, symlink_force + +from ctsm.utils import parse_isoduration + + +def get_parser(args, description, valid_neon_sites): + """ + Get parser object for this script. + """ + parser = argparse.ArgumentParser( + description=description, formatter_class=argparse.RawDescriptionHelpFormatter + ) + + setup_standard_logging_options(parser) + + parser.print_usage = parser.print_help + + parser.add_argument( + "--neon-sites", + help="4-letter neon site code.", + action="store", + required=False, + choices=valid_neon_sites + ["all"], + dest="neon_sites", + default=["OSBS"], + nargs="+", + ) + + parser.add_argument( + "--base-case", + help=""" + Root Directory of base case build + [default: %(default)s] + """, + action="store", + dest="base_case_root", + type=str, + required=False, + default=None, + ) + + parser.add_argument( + "--output-root", + help=""" + Root output directory of cases + [default: %(default)s] + """, + action="store", + dest="output_root", + type=str, + required=False, + default="CIME_OUTPUT_ROOT as defined in cime", + ) + + parser.add_argument( + "--overwrite", + help=""" + overwrite existing case directories + [default: %(default)s] + """, + action="store_true", + dest="overwrite", + required=False, + default=False, + ) + + parser.add_argument( + "--setup-only", + help=""" + Only setup the requested cases, do not build or run + [default: %(default)s] + """, + action="store_true", + dest="setup_only", + required=False, + default=False, + ) + + parser.add_argument( + "--rerun", + help=""" + If the case exists but does not appear to be complete, restart it. + [default: %(default)s] + """, + action="store_true", + dest="rerun", + required=False, + default=False, + ) + + parser.add_argument( + "--no-batch", + help=""" + Run locally, do not use batch queueing system (if defined for Machine) + [default: %(default)s] + """, + action="store_true", + dest="no_batch", + required=False, + default=False, + ) + + parser.add_argument( + "--run-type", + help=""" + Type of run to do + [default: %(default)s] + """, + choices=["ad", "postad", "transient"], #, "sasu"], + default="transient", + ) + + parser.add_argument( + "--prism", + help=""" + Uses the PRISM reanaylsis precipitation data for the site instead of the NEON data + (only available over Continental US) + """, + action="store_true", + dest="prism", + required=False, + default=False, + ) + + parser.add_argument( + "--experiment", + help=""" + Appends the case name with string for model experiment + """, + action="store", + dest="experiment", + type=str, + required=False, + default=None, + ) + + parser.add_argument( + "--run-length", + help=""" + How long to run (modified ISO 8601 duration) + [default: %(default)s] + """, + required=False, + type=str, + default="0Y", + ) + + #parser.add_argument( + # "--start-date", + # help=""" + # Start date for running CTSM simulation in ISO format. + # [default: %(default)s] + # (currently non-functional) + # """, + # action="store", + # dest="start_date", + # required=False, + # type=datetime.date.fromisoformat, + # default=datetime.datetime.strptime("2018-01-01", "%Y-%m-%d"), + #) + + #parser.add_argument( + # "--end-date", + # help=""" + # End date for running CTSM simulation in ISO format. + # [default: %(default)s] + # """, + # action="store", + # dest="end_date", + # required=False, + # type=datetime.date.fromisoformat, + # default=datetime.datetime.strptime("2021-01-01", "%Y-%m-%d"), + #) + + parser.add_argument( + "--run-from-postad", + help=""" + For transient runs only - should we start from the postad spinup or finidat? + By default start from finidat, if this flag is used the postad run must be available. + """, + action="store_true", + required=False, + default=False, + ) + parser.add_argument( + "--neon-version", + help=""" + Neon data version to use for this simulation. + [default: use the latest data available] + """, + action="store", + dest="user_version", + required=False, + type=str, + choices=["v1", "v2", "v3"], + ) + + args = parse_args_and_handle_standard_logging_options(args, parser) + + if "all" in args.neon_sites: + neon_sites = valid_neon_sites + else: + neon_sites = args.neon_sites + for site in neon_sites: + if site not in valid_neon_sites: + raise ValueError("Invalid site name {}".format(site)) + + if "CIME_OUTPUT_ROOT" in args.output_root: + args.output_root = None + + if args.run_length == "0Y": + if args.run_type == "ad": + run_length = "100Y" + elif args.run_type == "postad": + run_length = "100Y" + else: + # The transient run length is set by cdeps atm buildnml to + # the last date of the available tower data + # this value is not used + run_length = "4Y" + else: + run_length = args.run_length + + run_length = parse_isoduration(run_length) + base_case_root = None + if args.base_case_root: + base_case_root = os.path.abspath(args.base_case_root) + + # Reduce output level for this script unless --debug or + # --verbose is provided on the command line + if not args.debug and not args.verbose: + root_logger = logging.getLogger() + root_logger.setLevel(logging.WARN) + + return ( + neon_sites, + args.output_root, + args.run_type, + args.experiment, + args.prism, + args.overwrite, + run_length, + base_case_root, + args.run_from_postad, + args.setup_only, + args.no_batch, + args.rerun, + args.user_version, + ) diff --git a/python/ctsm/site_and_regional/neon_site.py b/python/ctsm/site_and_regional/neon_site.py new file mode 100755 index 0000000000..873d02fec7 --- /dev/null +++ b/python/ctsm/site_and_regional/neon_site.py @@ -0,0 +1,393 @@ +""" +This module contains the NeonSite class and class functions which are used in run_neon.py +""" + +# Import libraries +import datetime +import glob +import logging +import os +import re +import shutil +import sys +import time + +# Get the ctsm util tools and then the cime tools. +_CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python")) +sys.path.insert(1, _CTSM_PYTHON) + +from ctsm import add_cime_to_path + +from CIME import build +from CIME.case import Case +from CIME.utils import safe_copy, expect, symlink_force + +from ctsm.path_utils import path_to_ctsm_root + +logger = logging.getLogger(__name__) + + +class NeonSite: + """ + A class for encapsulating neon sites. + """ + + def __init__(self, name, start_year, end_year, start_month, end_month, finidat): + self.name = name + self.start_year = int(start_year) + self.end_year = int(end_year) + self.start_month = int(start_month) + self.end_month = int(end_month) + self.cesmroot = path_to_ctsm_root() + self.finidat = finidat + + def __str__(self): + return ( + str(self.__class__) + "\n" + "\n".join((str(item) + " = " for item in (self.__dict__))) + ) + + def build_base_case( + self, cesmroot, output_root, res, compset, overwrite=False, setup_only=False + ): + """ + Function for building a base_case to clone. + To spend less time on building ctsm for the neon cases, + all the other cases are cloned from this case + + Args: + self: + The NeonSite object + base_root (str): + root of the base_case CIME + res (str): + base_case resolution or gridname + compset (str): + base case compset + overwrite (bool) : + Flag to overwrite the case if exists + """ + print("---- building a base case -------") + self.base_case_root = output_root + user_mods_dirs = [os.path.join(cesmroot, "cime_config", "usermods_dirs", "NEON", self.name)] + if not output_root: + output_root = os.getcwd() + case_path = os.path.join(output_root, self.name) + + logger.info("base_case_name : %s", self.name) + logger.info("user_mods_dir : %s", user_mods_dirs[0]) + + if overwrite and os.path.isdir(case_path): + print("Removing the existing case at: {}".format(case_path)) + shutil.rmtree(case_path) + + with Case(case_path, read_only=False) as case: + if not os.path.isdir(case_path): + print("---- creating a base case -------") + + case.create( + case_path, + cesmroot, + compset, + res, + run_unsupported=True, + answer="r", + output_root=output_root, + user_mods_dirs=user_mods_dirs, + driver="nuopc", + ) + + print("---- base case created ------") + + # --change any config for base_case: + # case.set_value("RUN_TYPE","startup") + print("---- base case setup ------") + case.case_setup() + else: + # For existing case check that the compset name is correct + existingcompname = case.get_value("COMPSET") + match = re.search("^HIST", existingcompname, flags=re.IGNORECASE) + if re.search("^HIST", compset, flags=re.IGNORECASE) is None: + expect( + match is None, + """Existing base case is a historical type and should not be + --rerun with the --overwrite option""", + ) + else: + expect( + match is not None, + """Existing base case should be a historical type and is not + --rerun with the --overwrite option""", + ) + # reset the case + case.case_setup(reset=True) + case_path = case.get_value("CASEROOT") + + if setup_only: + return case_path + + print("---- base case build ------") + print("--- This may take a while and you may see WARNING messages ---") + # always walk through the build process to make sure it's up to date. + initial_time = time.time() + build.case_build(case_path, case=case) + end_time = time.time() + total = end_time - initial_time + print("Time required to building the base case: {} s.".format(total)) + # update case_path to be the full path to the base case + return case_path + + def get_batch_query(self, case): + """ + Function for querying the batch queue query command for a case, depending on the + user's batch system. + + Args: + case: + case object + """ + + if case.get_value("BATCH_SYSTEM") == "none": + return "none" + return case.get_value("batch_query") + + def run_case( + self, + base_case_root, + run_type, + prism, + run_length, + user_version, + overwrite=False, + setup_only=False, + no_batch=False, + rerun=False, + experiment=False, + ): + """ + Run case. + + Args: + self + base_case_root: str, opt + file path of base case + run_type: str, opt + transient, post_ad, or ad case, default transient + prism: bool, opt + if True, use PRISM precipitation, default False + run_length: str, opt + length of run, default '4Y' + user_version: str, opt + default 'latest' + overwrite: bool, opt + default False + setup_only: bool, opt + default False; if True, set up but do not run case + no_batch: bool, opt + default False + rerun: bool, opt + default False + experiment: str, opt + name of experiment, default False + """ + user_mods_dirs = [ + os.path.join(self.cesmroot, "cime_config", "usermods_dirs", "NEON", self.name) + ] + expect( + os.path.isdir(base_case_root), + "Error base case does not exist in {}".format(base_case_root), + ) + # -- if user gives a version: + if user_version: + version = user_version + else: + version = "latest" + + print("using this version:", version) + + if experiment is not None: + self.name = self.name + "." + experiment + case_root = os.path.abspath(os.path.join(base_case_root, "..", self.name + "." + run_type)) + + rundir = None + if os.path.isdir(case_root): + if overwrite: + print("---- removing the existing case -------") + shutil.rmtree(case_root) + elif rerun: + with Case(case_root, read_only=False) as case: + rundir = case.get_value("RUNDIR") + # For existing case check that the compset name is correct + existingcompname = case.get_value("COMPSET") + match = re.search("^HIST", existingcompname, flags=re.IGNORECASE) + if re.search("^HIST", compset, flags=re.IGNORECASE) is None: + expect( + match is None, + """Existing base case is a historical type and should not be + --rerun with the --overwrite option""", + ) + else: + expect( + match is not None, + """Existing base case should be a historical type and is not + --rerun with the --overwrite option""", + ) + if os.path.isfile(os.path.join(rundir, "ESMF_Profile.summary")): + print("Case {} appears to be complete, not rerunning.".format(case_root)) + elif not setup_only: + print("Resubmitting case {}".format(case_root)) + case.submit(no_batch=no_batch) + print("-----------------------------------") + print("Successfully submitted case!") + batch_query = self.get_batch_query(case) + if batch_query != "none": + print(f"Use {batch_query} to check its run status") + return + else: + logger.warning("Case already exists in %s, not overwritting", case_root) + return + + if run_type == "postad": + adcase_root = case_root.replace(".postad", ".ad") + if not os.path.isdir(adcase_root): + logger.warning("postad requested but no ad case found in %s", adcase_root) + return + + if not os.path.isdir(case_root): + # read_only = False should not be required here + with Case(base_case_root, read_only=False) as basecase: + print("---- cloning the base case in {}".format(case_root)) + # + # EBK: 11/05/2022 -- Note keeping the user_mods_dirs argument is important. Although + # it causes some of the user_nl_* files to have duplicated inputs. It also ensures + # that the shell_commands file is copied, as well as taking care of the DATM inputs. + # See https://github.com/ESCOMP/CTSM/pull/1872#pullrequestreview-1169407493 + # + basecase.create_clone(case_root, keepexe=True, user_mods_dirs=user_mods_dirs) + + with Case(case_root, read_only=False) as case: + if run_type != "transient": + # in order to avoid the complication of leap years, + # we always set the run_length in units of days. + case.set_value("STOP_OPTION", "ndays") + case.set_value("REST_OPTION", "end") + case.set_value("CONTINUE_RUN", False) + case.set_value("NEONVERSION", version) + if prism: + case.set_value("CLM_USRDAT_NAME", "NEON.PRISM") + + if run_type == "ad": + case.set_value("CLM_FORCE_COLDSTART", "on") + case.set_value("CLM_ACCELERATED_SPINUP", "on") + case.set_value("RUN_REFDATE", "0018-01-01") + case.set_value("RUN_STARTDATE", "0018-01-01") + case.set_value("RESUBMIT", 1) + case.set_value("STOP_N", run_length) + + else: + case.set_value("CLM_FORCE_COLDSTART", "off") + case.set_value("CLM_ACCELERATED_SPINUP", "off") + case.set_value("RUN_TYPE", "hybrid") + + if run_type == "postad": + self.set_ref_case(case) + case.set_value("STOP_N", run_length) + + # For transient cases STOP will be set in the user_mod_directory + if run_type == "transient": + if self.finidat: + case.set_value("RUN_TYPE", "startup") + else: + if not self.set_ref_case(case): + return + case.set_value("CALENDAR", "GREGORIAN") + case.set_value("RESUBMIT", 0) + case.set_value("STOP_OPTION", "nmonths") + + if not rundir: + rundir = case.get_value("RUNDIR") + + self.modify_user_nl(case_root, run_type, rundir) + + case.create_namelists() + # explicitly run check_input_data + case.check_all_input_data() + if not setup_only: + case.submit(no_batch=no_batch) + print("-----------------------------------") + print("Successfully submitted case!") + batch_query = self.get_batch_query(case) + if batch_query != "none": + print(f"Use {batch_query} to check its run status") + + def set_ref_case(self, case): + """ + Set an existing case as the reference case, eg for use with spinup. + """ + rundir = case.get_value("RUNDIR") + case_root = case.get_value("CASEROOT") + if case_root.endswith(".postad"): + ref_case_root = case_root.replace(".postad", ".ad") + root = ".ad" + else: + ref_case_root = case_root.replace(".transient", ".postad") + root = ".postad" + if not os.path.isdir(ref_case_root): + logger.warning( + "ERROR: spinup must be completed first, could not find directory %s", ref_case_root + ) + return False + + with Case(ref_case_root) as refcase: + refrundir = refcase.get_value("RUNDIR") + case.set_value("RUN_REFDIR", refrundir) + case.set_value("RUN_REFCASE", os.path.basename(ref_case_root)) + refdate = None + for reffile in glob.iglob(refrundir + "/{}{}.clm2.r.*.nc".format(self.name, root)): + m_searched = re.search(r"(\d\d\d\d-\d\d-\d\d)-\d\d\d\d\d.nc", reffile) + if m_searched: + refdate = m_searched.group(1) + symlink_force(reffile, os.path.join(rundir, os.path.basename(reffile))) + logger.info("Found refdate of %s", refdate) + if not refdate: + logger.warning("Could not find refcase for %s", case_root) + return False + + for rpfile in glob.iglob(refrundir + "/rpointer*"): + safe_copy(rpfile, rundir) + if not os.path.isdir(os.path.join(rundir, "inputdata")) and os.path.isdir( + os.path.join(refrundir, "inputdata") + ): + symlink_force(os.path.join(refrundir, "inputdata"), os.path.join(rundir, "inputdata")) + + case.set_value("RUN_REFDATE", refdate) + if case_root.endswith(".postad"): + case.set_value("RUN_STARTDATE", refdate) + # NOTE: if start options are set, RUN_STARTDATE should be modified here + return True + + def modify_user_nl(self, case_root, run_type, rundir): + """ + Modify user namelist. If transient, include finidat in user_nl; + Otherwise, adjust user_nl to include different mfilt, nhtfrq, and variables in hist_fincl1. + """ + user_nl_fname = os.path.join(case_root, "user_nl_clm") + user_nl_lines = None + if run_type == "transient": + if self.finidat: + user_nl_lines = [ + "finidat = '{}/inputdata/lnd/ctsm/initdata/{}'".format(rundir, self.finidat) + ] + else: + user_nl_lines = [ + "hist_fincl2 = ''", + "hist_mfilt = 20", + "hist_nhtfrq = -8760", + "hist_empty_htapes = .true.", + """hist_fincl1 = 'TOTECOSYSC', 'TOTECOSYSN', 'TOTSOMC', 'TOTSOMN', 'TOTVEGC', + 'TOTVEGN', 'TLAI', 'GPP', 'CPOOL', 'NPP', 'TWS', 'H2OSNO'""", + ] + + if user_nl_lines: + with open(user_nl_fname, "a") as nl_file: + for line in user_nl_lines: + nl_file.write("{}\n".format(line)) diff --git a/python/ctsm/site_and_regional/run_neon.py b/python/ctsm/site_and_regional/run_neon.py index a69dc0bdb0..f7ed477cf1 100755 --- a/python/ctsm/site_and_regional/run_neon.py +++ b/python/ctsm/site_and_regional/run_neon.py @@ -47,670 +47,36 @@ # - [ ] Matrix spin-up if (SASU) Eric merged it in # - [ ] Make sure both AD and SASU are not on at the same time -# - [ ] Make sure CIME and other dependencies is checked out. +# - [ ] Make sure CIME and other dependencies are checked out. # Import libraries -import argparse -import datetime import glob import logging import os -import re -import shutil import sys -import time import pandas as pd -from standard_script_setup import * - # Get the ctsm util tools and then the cime tools. _CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python")) sys.path.insert(1, _CTSM_PYTHON) +from ctsm.path_utils import path_to_ctsm_root +from ctsm.download_utils import download_file +from ctsm.site_and_regional.arg_parse import get_parser +from ctsm.site_and_regional.neon_site import NeonSite + from ctsm import add_cime_to_path from CIME import build from CIME.case import Case from CIME.utils import safe_copy, expect, symlink_force -from ctsm.path_utils import path_to_ctsm_root -from ctsm.utils import parse_isoduration -from ctsm.download_utils import download_file - from standard_script_setup import * logger = logging.getLogger(__name__) -def get_parser(args, description, valid_neon_sites): - """ - Get parser object for this script. - """ - parser = argparse.ArgumentParser( - description=description, formatter_class=argparse.RawDescriptionHelpFormatter - ) - - CIME.utils.setup_standard_logging_options(parser) - - parser.print_usage = parser.print_help - - parser.add_argument( - "--neon-sites", - help="4-letter neon site code.", - action="store", - required=False, - choices=valid_neon_sites + ["all"], - dest="neon_sites", - default=["OSBS"], - nargs="+", - ) - - parser.add_argument( - "--base-case", - help=""" - Root Directory of base case build - [default: %(default)s] - """, - action="store", - dest="base_case_root", - type=str, - required=False, - default=None, - ) - - parser.add_argument( - "--output-root", - help=""" - Root output directory of cases - [default: %(default)s] - """, - action="store", - dest="output_root", - type=str, - required=False, - default="CIME_OUTPUT_ROOT as defined in cime", - ) - - parser.add_argument( - "--overwrite", - help=""" - overwrite existing case directories - [default: %(default)s] - """, - action="store_true", - dest="overwrite", - required=False, - default=False, - ) - - parser.add_argument( - "--setup-only", - help=""" - Only setup the requested cases, do not build or run - [default: %(default)s] - """, - action="store_true", - dest="setup_only", - required=False, - default=False, - ) - - parser.add_argument( - "--rerun", - help=""" - If the case exists but does not appear to be complete, restart it. - [default: %(default)s] - """, - action="store_true", - dest="rerun", - required=False, - default=False, - ) - - parser.add_argument( - "--no-batch", - help=""" - Run locally, do not use batch queueing system (if defined for Machine) - [default: %(default)s] - """, - action="store_true", - dest="no_batch", - required=False, - default=False, - ) - - parser.add_argument( - "--run-type", - help=""" - Type of run to do - [default: %(default)s] - """, - choices=["ad", "postad", "transient", "sasu"], - default="transient", - ) - - parser.add_argument( - "--prism", - help=""" - Uses the PRISM reanaylsis precipitation data for the site instead of the NEON data - (only available over Continental US) - """, - action="store_true", - dest="prism", - required=False, - default=False, - ) - - parser.add_argument( - "--experiment", - help=""" - Appends the case name with string for model experiment - """, - action="store", - dest="experiment", - type=str, - required=False, - default=None, - ) - - parser.add_argument( - "--run-length", - help=""" - How long to run (modified ISO 8601 duration) - [default: %(default)s] - """, - required=False, - type=str, - default="0Y", - ) - - parser.add_argument( - "--start-date", - help=""" - Start date for running CTSM simulation in ISO format. - [default: %(default)s] - (currently non-functional) - """, - action="store", - dest="start_date", - required=False, - type=datetime.date.fromisoformat, - default=datetime.datetime.strptime("2018-01-01", "%Y-%m-%d"), - ) - - parser.add_argument( - "--end-date", - help=""" - End date for running CTSM simulation in ISO format. - [default: %(default)s] - """, - action="store", - dest="end_date", - required=False, - type=datetime.date.fromisoformat, - default=datetime.datetime.strptime("2021-01-01", "%Y-%m-%d"), - ) - - parser.add_argument( - "--run-from-postad", - help=""" - For transient runs only - should we start from the postad spinup or finidat? - By default start from finidat, if this flag is used the postad run must be available. - """, - action="store_true", - required=False, - default=False, - ) - parser.add_argument( - "--neon-version", - help=""" - Neon data version to use for this simulation. - [default: use the latest data available] - """, - action="store", - dest="user_version", - required=False, - type=str, - choices=["v1", "v2", "v3"], - ) - - args = CIME.utils.parse_args_and_handle_standard_logging_options(args, parser) - - if "all" in args.neon_sites: - neon_sites = valid_neon_sites - else: - neon_sites = args.neon_sites - for site in neon_sites: - if site not in valid_neon_sites: - raise ValueError("Invalid site name {}".format(site)) - - if "CIME_OUTPUT_ROOT" in args.output_root: - args.output_root = None - - if args.run_length == "0Y": - if args.run_type == "ad": - run_length = "100Y" - elif args.run_type == "postad": - run_length = "100Y" - else: - # The transient run length is set by cdeps atm buildnml to - # the last date of the available tower data - # this value is not used - run_length = "4Y" - else: - run_length = args.run_length - - run_length = parse_isoduration(run_length) - base_case_root = None - if args.base_case_root: - base_case_root = os.path.abspath(args.base_case_root) - - # Reduce output level for this script unless --debug or - # --verbose is provided on the command line - if not args.debug and not args.verbose: - root_logger = logging.getLogger() - root_logger.setLevel(logging.WARN) - - return ( - neon_sites, - args.output_root, - args.run_type, - args.experiment, - args.prism, - args.overwrite, - run_length, - base_case_root, - args.run_from_postad, - args.setup_only, - args.no_batch, - args.rerun, - args.user_version, - ) - - -class NeonSite: - """ - A class for encapsulating neon sites. - - ... - - Attributes - ---------- - - Methods - ------- - """ - - def __init__(self, name, start_year, end_year, start_month, end_month, finidat): - self.name = name - self.start_year = int(start_year) - self.end_year = int(end_year) - self.start_month = int(start_month) - self.end_month = int(end_month) - self.cesmroot = path_to_ctsm_root() - self.finidat = finidat - - def __str__(self): - return str(self.__class__) + "\n" + "\n".join((str(item) + " = " for item in self.__dict__)) - - def build_base_case( - self, cesmroot, output_root, res, compset, overwrite=False, setup_only=False - ): - """ - Function for building a base_case to clone. - To spend less time on building ctsm for the neon cases, - all the other cases are cloned from this case - - Args: - self: - The NeonSite object - base_root (str): - root of the base_case CIME - res (str): - base_case resolution or gridname - compset (str): - base case compset - overwrite (bool) : - Flag to overwrite the case if exists - """ - print("---- building a base case -------") - self.base_case_root = output_root - user_mods_dirs = [os.path.join(cesmroot, "cime_config", "usermods_dirs", "NEON", self.name)] - if not output_root: - output_root = os.getcwd() - case_path = os.path.join(output_root, self.name) - - logger.info("base_case_name : %s", self.name) - logger.info("user_mods_dir : %s", user_mods_dirs[0]) - - if overwrite and os.path.isdir(case_path): - print("Removing the existing case at: {}".format(case_path)) - shutil.rmtree(case_path) - - with Case(case_path, read_only=False) as case: - if not os.path.isdir(case_path): - print("---- creating a base case -------") - - case.create( - case_path, - cesmroot, - compset, - res, - run_unsupported=True, - answer="r", - output_root=output_root, - user_mods_dirs=user_mods_dirs, - driver="nuopc", - ) - - print("---- base case created ------") - - # --change any config for base_case: - # case.set_value("RUN_TYPE","startup") - print("---- base case setup ------") - case.case_setup() - else: - # For existing case check that the compset name is correct - existingcompname = case.get_value("COMPSET") - match = re.search("^HIST", existingcompname, flags=re.IGNORECASE) - if re.search("^HIST", compset, flags=re.IGNORECASE) is None: - expect( - match is None, - "Existing base case is a historical type and should " - + "not be -- rerun with the --overwrite option", - ) - else: - expect( - match is not None, - "Existing base case should be a historical type and " - + "is not -- rerun with the --overwrite option", - ) - # reset the case - case.case_setup(reset=True) - case_path = case.get_value("CASEROOT") - - if setup_only: - return case_path - - print("---- base case build ------") - print("--- This may take a while and you may see WARNING messages ---") - # always walk through the build process to make sure it's up to date. - t_0 = time.time() - build.case_build(case_path, case=case) - t_1 = time.time() - total = t_1 - t_0 - print("Time required to building the base case: {} s.".format(total)) - # update case_path to be the full path to the base case - return case_path - - def diff_month(self): - """ - Determine difference between two dates in months - """ - d_1 = datetime.datetime(self.end_year, self.end_month, 1) - d_2 = datetime.datetime(self.start_year, self.start_month, 1) - return (d_1.year - d_2.year) * 12 + d_1.month - d_2.month - - def run_case( - self, - base_case_root, - run_type, - prism, - run_length, - user_version, - overwrite=False, - setup_only=False, - no_batch=False, - rerun=False, - experiment=False, - ): - """ - Run case. - - Args: - self - base_case_root: str, opt - file path of base case - run_type: str, opt - transient, post_ad, or ad case, default transient - prism: bool, opt - if True, use PRISM precipitation, default False - run_length: str, opt - length of run, default '4Y' - user_version: str, opt - default 'latest' - overwrite: bool, opt - default False - setup_only: bool, opt - default False; if True, set up but do not run case - no_batch: bool, opt - default False - rerun: bool, opt - default False - experiment: str, opt - name of experiment, default False - """ - user_mods_dirs = [ - os.path.join(self.cesmroot, "cime_config", "usermods_dirs", "NEON", self.name) - ] - expect( - os.path.isdir(base_case_root), - "Error base case does not exist in {}".format(base_case_root), - ) - # -- if user gives a version: - if user_version: - version = user_version - else: - version = "latest" - - print("using this version:", version) - - if experiment is not None: - self.name = self.name + "." + experiment - case_root = os.path.abspath(os.path.join(base_case_root, "..", self.name + "." + run_type)) - - rundir = None - if os.path.isdir(case_root): - if overwrite: - print("---- removing the existing case -------") - shutil.rmtree(case_root) - elif rerun: - with Case(case_root, read_only=False) as case: - rundir = case.get_value("RUNDIR") - # For existing case check that the compset name is correct - existingcompname = case.get_value("COMPSET") - match = re.search("^HIST", existingcompname, flags=re.IGNORECASE) - if re.search("^HIST", compset, flags=re.IGNORECASE) is None: - expect( - match is None, - "Existing base case is a historical type and " - + "should not be -- rerun with the --overwrite option", - ) - else: - expect( - match is not None, - "Existing base case should be a historical type " - + "and is not -- rerun with the --overwrite option", - ) - if os.path.isfile(os.path.join(rundir, "ESMF_Profile.summary")): - print("Case {} appears to be complete, not rerunning.".format(case_root)) - elif not setup_only: - print("Resubmitting case {}".format(case_root)) - case.submit(no_batch=no_batch) - print("-----------------------------------") - print("Successfully submitted case!") - batch_query = self.get_batch_query(case) - if batch_query != "none": - print(f"Use {batch_query} to check its run status") - return - else: - logger.warning("Case already exists in %s, not overwritting.", case_root) - return - - if run_type == "postad": - adcase_root = case_root.replace(".postad", ".ad") - if not os.path.isdir(adcase_root): - logger.warning("postad requested but no ad case found in %s", adcase_root) - return - - if not os.path.isdir(case_root): - # read_only = False should not be required here - with Case(base_case_root, read_only=False) as basecase: - print("---- cloning the base case in {}".format(case_root)) - # - # EBK: 11/05/2022 -- Note keeping the user_mods_dirs argument is important. Although - # it causes some of the user_nl_* files to have duplicated inputs. It also ensures - # that the shell_commands file is copied, as well as taking care of the DATM inputs. - # See https://github.com/ESCOMP/CTSM/pull/1872#pullrequestreview-1169407493 - # - basecase.create_clone(case_root, keepexe=True, user_mods_dirs=user_mods_dirs) - - with Case(case_root, read_only=False) as case: - if run_type != "transient": - # in order to avoid the complication of leap years, - # we always set the run_length in units of days. - case.set_value("STOP_OPTION", "ndays") - case.set_value("REST_OPTION", "end") - case.set_value("CONTINUE_RUN", False) - case.set_value("NEONVERSION", version) - if prism: - case.set_value("CLM_USRDAT_NAME", "NEON.PRISM") - - if run_type == "ad": - case.set_value("CLM_FORCE_COLDSTART", "on") - case.set_value("CLM_ACCELERATED_SPINUP", "on") - case.set_value("RUN_REFDATE", "0018-01-01") - case.set_value("RUN_STARTDATE", "0018-01-01") - case.set_value("RESUBMIT", 1) - case.set_value("STOP_N", run_length) - - else: - case.set_value("CLM_FORCE_COLDSTART", "off") - case.set_value("CLM_ACCELERATED_SPINUP", "off") - case.set_value("RUN_TYPE", "hybrid") - - if run_type == "postad": - self.set_ref_case(case) - case.set_value("STOP_N", run_length) - - # For transient cases STOP will be set in the user_mod_directory - if run_type == "transient": - if self.finidat: - case.set_value("RUN_TYPE", "startup") - else: - if not self.set_ref_case(case): - return - case.set_value("CALENDAR", "GREGORIAN") - case.set_value("RESUBMIT", 0) - case.set_value("STOP_OPTION", "nmonths") - - if not rundir: - rundir = case.get_value("RUNDIR") - - self.modify_user_nl(case_root, run_type, rundir) - - case.create_namelists() - # explicitly run check_input_data - case.check_all_input_data() - if not setup_only: - case.submit(no_batch=no_batch) - print("-----------------------------------") - print("Successfully submitted case!") - batch_query = self.get_batch_query(case) - if batch_query != "none": - print(f"Use {batch_query} to check its run status") - - def set_ref_case(self, case): - """ - Set an existing case as the reference case, eg for use with spinup. - """ - rundir = case.get_value("RUNDIR") - case_root = case.get_value("CASEROOT") - if case_root.endswith(".postad"): - ref_case_root = case_root.replace(".postad", ".ad") - root = ".ad" - else: - ref_case_root = case_root.replace(".transient", ".postad") - root = ".postad" - if not os.path.isdir(ref_case_root): - logger.warning( - "ERROR: spinup must be completed first, could not find directory %s", ref_case_root - ) - return False - - with Case(ref_case_root) as refcase: - refrundir = refcase.get_value("RUNDIR") - case.set_value("RUN_REFDIR", refrundir) - case.set_value("RUN_REFCASE", os.path.basename(ref_case_root)) - refdate = None - for reffile in glob.iglob(refrundir + "/{}{}.clm2.r.*.nc".format(self.name, root)): - mon = re.search(r"(\d\d\d\d-\d\d-\d\d)-\d\d\d\d\d.nc", reffile) - if mon: - refdate = mon.group(1) - symlink_force(reffile, os.path.join(rundir, os.path.basename(reffile))) - logger.info("Found refdate of %s", refdate) - if not refdate: - logger.warning("Could not find refcase for %s", case_root) - return False - - for rpfile in glob.iglob(refrundir + "/rpointer*"): - safe_copy(rpfile, rundir) - if not os.path.isdir(os.path.join(rundir, "inputdata")) and os.path.isdir( - os.path.join(refrundir, "inputdata") - ): - symlink_force(os.path.join(refrundir, "inputdata"), os.path.join(rundir, "inputdata")) - - case.set_value("RUN_REFDATE", refdate) - if case_root.endswith(".postad"): - case.set_value("RUN_STARTDATE", refdate) - # NOTE: if start options are set, RUN_STARTDATE should be modified here - return True - - def modify_user_nl(self, case_root, run_type, rundir): - """ - Modify user namelist. If transient, include finidat in user_nl; - Otherwise, adjust user_nl to include different mfilt, nhtfrq, and variables in hist_fincl1. - """ - user_nl_fname = os.path.join(case_root, "user_nl_clm") - user_nl_lines = None - if run_type == "transient": - if self.finidat: - user_nl_lines = [ - "finidat = '{}/inputdata/lnd/ctsm/initdata/{}'".format(rundir, self.finidat) - ] - else: - user_nl_lines = [ - "hist_fincl2 = ''", - "hist_mfilt = 20", - "hist_nhtfrq = -8760", - "hist_empty_htapes = .true.", - "hist_fincl1 = 'TOTECOSYSC', 'TOTECOSYSN', 'TOTSOMC', " - + "'TOTSOMN', 'TOTVEGC', 'TOTVEGN', 'TLAI', " - + "'GPP', 'CPOOL', 'NPP', 'TWS', 'H2OSNO'", - ] - - if user_nl_lines: - with open(user_nl_fname, "a") as f_d: - for line in user_nl_lines: - f_d.write("{}\n".format(line)) - - -def get_batch_query(case): - """ - Function for querying the batch queue query command for a case, depending on the - user's batch system. - - Args: - case: - case object - """ - - if case.get_value("BATCH_SYSTEM") == "none": - return "none" - return case.get_value("batch_query") - - def check_neon_listing(valid_neon_sites): """ A function to download and parse neon listing file. @@ -742,19 +108,19 @@ def parse_neon_listing(listing_file, valid_neon_sites): available_list = [] - d_f = pd.read_csv(listing_file) + listing_df = pd.read_csv(listing_file) # check for finidat files for transient run - finidatlist = d_f[d_f["object"].str.contains("lnd/ctsm")] + finidatlist = listing_df[listing_df["object"].str.contains("lnd/ctsm")] # -- filter lines with atm/cdep - d_f = d_f[d_f["object"].str.contains("atm/cdeps/")] + listing_df = listing_df[listing_df["object"].str.contains("atm/cdeps/")] # -- split the object str to extract site name - d_f = d_f["object"].str.split("/", expand=True) + listing_df = listing_df["object"].str.split("/", expand=True) # -- groupby site name - grouped_df = d_f.groupby(8) + grouped_df = listing_df.groupby(8) for key, _ in grouped_df: # -- check if it is a valid neon site if any(key in x for x in valid_neon_sites): @@ -787,7 +153,7 @@ def parse_neon_listing(listing_file, valid_neon_sites): start_month = tmp_df2[1].iloc[0] end_month = tmp_df2[1].iloc[-1] - logger.debug("Valid neon site found: %s", site_name) + logger.debug("Valid neon site %s found!", site_name) logger.debug("File version %s", latest_version) logger.debug("start_year=%s", start_year) logger.debug("end_year=%s", end_year) diff --git a/python/ctsm/test/test_unit_NeonSite.py b/python/ctsm/test/test_unit_NeonSite.py new file mode 100755 index 0000000000..2e6c2650e5 --- /dev/null +++ b/python/ctsm/test/test_unit_NeonSite.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +""" +Unit tests for NeonSite + +You can run this by: + python -m unittest test_unit_NeonSite.py +""" + +import unittest +import tempfile +import shutil +import os +import sys + +# -- add python/ctsm to path (needed if we want to run the test stand-alone) +_CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) +sys.path.insert(1, _CTSM_PYTHON) + +# pylint: disable=wrong-import-position +from ctsm import unit_testing +from ctsm.site_and_regional.NeonSite import FUNCTION_NAME + +# pylint: disable=invalid-name + + +class TestNeonSite(unittest.TestCase): + """ + Basic class for testing NeonSite.py. + """ + + def setUp(self): + """ + Make /_tempdir for use by these tests. + """ + self._tempdir = tempfile.mkdtemp() + + def tearDown(self): + """ + Remove temporary directory + """ + shutil.rmtree(self._tempdir, ignore_errors=True) + + def test_function(self): + """ + Test that NeonSite class is working properly... + """ + #valid_neon_sites = ["ABBY", "BART"] + #previous_dir = os.getcwd() + #os.chdir(self._tempdir) # cd to tempdir + #available_list = check_neon_listing(valid_neon_sites) + #self.assertEqual( + # available_list[0].name, "ABBY", "available list of actual sites not as expected" + #) + #self.assertEqual( + # available_list[1].name, "BART", "available list of actual sites not as expected" + #) + # change to previous dir once listing.csv file is created in tempdir and test complete + #os.chdir(previous_dir) + continue + + +if __name__ == "__main__": + unit_testing.setup_for_tests() + unittest.main() diff --git a/python/ctsm/test/test_unit_arg_parse.py b/python/ctsm/test/test_unit_arg_parse.py new file mode 100755 index 0000000000..ab4fc2adb8 --- /dev/null +++ b/python/ctsm/test/test_unit_arg_parse.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +""" +Unit tests for arg_parse + +You can run this by: + python -m unittest test_unit_arg_parse.py +""" + +import unittest +import tempfile +import shutil +import os +import sys + +# -- add python/ctsm to path (needed if we want to run the test stand-alone) +_CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) +sys.path.insert(1, _CTSM_PYTHON) + +# pylint: disable=wrong-import-position +from ctsm import unit_testing +from ctsm.site_and_regional.arg_parse import FUNCTION_NAME + +# pylint: disable=invalid-name + + +class Testarg_parse(unittest.TestCase): + """ + Basic class for testing arg_parse.py. + """ + + def setUp(self): + """ + Make /_tempdir for use by these tests. + """ + self._tempdir = tempfile.mkdtemp() + + def tearDown(self): + """ + Remove temporary directory + """ + shutil.rmtree(self._tempdir, ignore_errors=True) + + def test_function(self): + """ + Test that arg_parse is working properly... + """ + #valid_neon_sites = ["ABBY", "BART"] + #previous_dir = os.getcwd() + #os.chdir(self._tempdir) # cd to tempdir + #available_list = check_neon_listing(valid_neon_sites) + #self.assertEqual( + # available_list[0].name, "ABBY", "available list of actual sites not as expected" + #) + #self.assertEqual( + # available_list[1].name, "BART", "available list of actual sites not as expected" + #) + # change to previous dir once listing.csv file is created in tempdir and test complete + #os.chdir(previous_dir) + continue + + +if __name__ == "__main__": + unit_testing.setup_for_tests() + unittest.main() diff --git a/tools/site_and_regional/run_neon b/tools/site_and_regional/run_neon index ad930f50e3..d2bdfbe786 100755 --- a/tools/site_and_regional/run_neon +++ b/tools/site_and_regional/run_neon @@ -34,6 +34,7 @@ conda activate ctsm_pylib import os import sys +import argparse # -- add python/ctsm to path _CTSM_PYTHON = os.path.join( @@ -41,6 +42,9 @@ _CTSM_PYTHON = os.path.join( ) sys.path.insert(1, _CTSM_PYTHON) +from ctsm.site_and_regional.neon_site import NeonSite +from ctsm.site_and_regional.arg_parse import get_parser + from ctsm.site_and_regional.run_neon import main if __name__ == "__main__": From 4a8c4995c7578c82a680b52f54fc785d4138212d Mon Sep 17 00:00:00 2001 From: Teagan King Date: Wed, 10 Jan 2024 14:10:33 -0700 Subject: [PATCH 08/85] a few more refactoring updates, formatting & testing --- python/ctsm/site_and_regional/arg_parse.py | 3 +- python/ctsm/site_and_regional/neon_site.py | 3 +- python/ctsm/test/test_unit_NeonSite.py | 16 +++++++++-- python/ctsm/test/test_unit_arg_parse.py | 33 +++++++++++++--------- tools/site_and_regional/run_neon | 3 -- 5 files changed, 35 insertions(+), 23 deletions(-) diff --git a/python/ctsm/site_and_regional/arg_parse.py b/python/ctsm/site_and_regional/arg_parse.py index 9025f5dde8..f61a262911 100644 --- a/python/ctsm/site_and_regional/arg_parse.py +++ b/python/ctsm/site_and_regional/arg_parse.py @@ -13,13 +13,12 @@ sys.path.insert(1, _CTSM_PYTHON) from ctsm import add_cime_to_path +from ctsm.utils import parse_isoduration from CIME import build from CIME.case import Case from CIME.utils import safe_copy, expect, parse_args_and_handle_standard_logging_options, setup_standard_logging_options, symlink_force -from ctsm.utils import parse_isoduration - def get_parser(args, description, valid_neon_sites): """ diff --git a/python/ctsm/site_and_regional/neon_site.py b/python/ctsm/site_and_regional/neon_site.py index 873d02fec7..c3fe6e2c68 100755 --- a/python/ctsm/site_and_regional/neon_site.py +++ b/python/ctsm/site_and_regional/neon_site.py @@ -17,13 +17,12 @@ sys.path.insert(1, _CTSM_PYTHON) from ctsm import add_cime_to_path +from ctsm.path_utils import path_to_ctsm_root from CIME import build from CIME.case import Case from CIME.utils import safe_copy, expect, symlink_force -from ctsm.path_utils import path_to_ctsm_root - logger = logging.getLogger(__name__) diff --git a/python/ctsm/test/test_unit_NeonSite.py b/python/ctsm/test/test_unit_NeonSite.py index 2e6c2650e5..3832bf8537 100755 --- a/python/ctsm/test/test_unit_NeonSite.py +++ b/python/ctsm/test/test_unit_NeonSite.py @@ -18,7 +18,7 @@ # pylint: disable=wrong-import-position from ctsm import unit_testing -from ctsm.site_and_regional.NeonSite import FUNCTION_NAME +from ctsm.site_and_regional.neon_site import NeonSite # pylint: disable=invalid-name @@ -40,7 +40,7 @@ def tearDown(self): """ shutil.rmtree(self._tempdir, ignore_errors=True) - def test_function(self): + def test_build_base_case(self): """ Test that NeonSite class is working properly... """ @@ -57,6 +57,18 @@ def test_function(self): # change to previous dir once listing.csv file is created in tempdir and test complete #os.chdir(previous_dir) continue + def test_get_batch_query(self): + """ + """ + continue + def test_run_case(self): + """ + """ + continue + def test_modify_user_nl(self): + """ + """ + continue if __name__ == "__main__": diff --git a/python/ctsm/test/test_unit_arg_parse.py b/python/ctsm/test/test_unit_arg_parse.py index ab4fc2adb8..f7cf7a97ff 100755 --- a/python/ctsm/test/test_unit_arg_parse.py +++ b/python/ctsm/test/test_unit_arg_parse.py @@ -11,6 +11,7 @@ import shutil import os import sys +import glob # -- add python/ctsm to path (needed if we want to run the test stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) @@ -18,7 +19,8 @@ # pylint: disable=wrong-import-position from ctsm import unit_testing -from ctsm.site_and_regional.arg_parse import FUNCTION_NAME +from ctsm.site_and_regional.arg_parse import get_parser +from ctsm.path_utils import path_to_ctsm_root # pylint: disable=invalid-name @@ -44,19 +46,22 @@ def test_function(self): """ Test that arg_parse is working properly... """ - #valid_neon_sites = ["ABBY", "BART"] - #previous_dir = os.getcwd() - #os.chdir(self._tempdir) # cd to tempdir - #available_list = check_neon_listing(valid_neon_sites) - #self.assertEqual( - # available_list[0].name, "ABBY", "available list of actual sites not as expected" - #) - #self.assertEqual( - # available_list[1].name, "BART", "available list of actual sites not as expected" - #) - # change to previous dir once listing.csv file is created in tempdir and test complete - #os.chdir(previous_dir) - continue + sys.argv = ["--neon-sites ['ABBY']"] + #arguments= ["--neon-sites", "ABBY"] #, "--experiment 'test'", '--overwrite False', '--setup-only True', '--rerun False', '--run-type ad', '--experiment test'] + description='' + cesmroot = path_to_ctsm_root() + valid_neon_sites = glob.glob( + os.path.join(cesmroot, "cime_config", "usermods_dirs", "NEON", "[!d]*") + ) + valid_neon_sites = sorted([v.split("/")[-1] for v in valid_neon_sites]) + parsed_arguments = get_parser(sys.argv, description, valid_neon_sites) + + print(parsed_arguments) + self.assertEqual( + parsed_arguments[0], "ABBY", "arguments not processed as expected" + ) + # TODO: Still need to figure out correct formatting to get argument recognized properly! + # TODO: Also it might be useful to add in a number of fake arguments to check that they all work... if __name__ == "__main__": diff --git a/tools/site_and_regional/run_neon b/tools/site_and_regional/run_neon index d2bdfbe786..e20189a374 100755 --- a/tools/site_and_regional/run_neon +++ b/tools/site_and_regional/run_neon @@ -42,9 +42,6 @@ _CTSM_PYTHON = os.path.join( ) sys.path.insert(1, _CTSM_PYTHON) -from ctsm.site_and_regional.neon_site import NeonSite -from ctsm.site_and_regional.arg_parse import get_parser - from ctsm.site_and_regional.run_neon import main if __name__ == "__main__": From d3bffbe824c05ff919f80b1ece6d3e641696ff12 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Thu, 11 Jan 2024 11:12:56 -0700 Subject: [PATCH 09/85] included new tests for NeonSite class --- python/ctsm/site_and_regional/neon_site.py | 1 + python/ctsm/test/test_unit_NeonSite.py | 95 ++++++++++++++++++---- tools/site_and_regional/run_neon | 1 - 3 files changed, 78 insertions(+), 19 deletions(-) diff --git a/python/ctsm/site_and_regional/neon_site.py b/python/ctsm/site_and_regional/neon_site.py index c3fe6e2c68..2e92e7f04b 100755 --- a/python/ctsm/site_and_regional/neon_site.py +++ b/python/ctsm/site_and_regional/neon_site.py @@ -390,3 +390,4 @@ def modify_user_nl(self, case_root, run_type, rundir): with open(user_nl_fname, "a") as nl_file: for line in user_nl_lines: nl_file.write("{}\n".format(line)) + diff --git a/python/ctsm/test/test_unit_NeonSite.py b/python/ctsm/test/test_unit_NeonSite.py index 3832bf8537..1520a58426 100755 --- a/python/ctsm/test/test_unit_NeonSite.py +++ b/python/ctsm/test/test_unit_NeonSite.py @@ -10,6 +10,7 @@ import tempfile import shutil import os +import glob import sys # -- add python/ctsm to path (needed if we want to run the test stand-alone) @@ -42,33 +43,91 @@ def tearDown(self): def test_build_base_case(self): """ - Test that NeonSite class is working properly... + Test that NeonSite class' build_base_case is working properly... """ - #valid_neon_sites = ["ABBY", "BART"] - #previous_dir = os.getcwd() - #os.chdir(self._tempdir) # cd to tempdir - #available_list = check_neon_listing(valid_neon_sites) - #self.assertEqual( - # available_list[0].name, "ABBY", "available list of actual sites not as expected" - #) - #self.assertEqual( - # available_list[1].name, "BART", "available list of actual sites not as expected" - #) - # change to previous dir once listing.csv file is created in tempdir and test complete - #os.chdir(previous_dir) - continue + #neonsite = NeonSite(ADD SOME PARAMETERS) + #neonsite.build_base_case(ARGUMENTS) # NOT SURE WE ACTUALLY WANT TO DO THIS DUE TO TIME CONSTRAINTS? + #CHECK IF ACTS AS EXPECTED + #continue + def test_get_batch_query(self): """ + Test that NeonSite class' get_batch_query is working properly... """ - continue + #neonsite = NeonSite(ADD SOME PARAMETERS) + #neonsite.get_batch_query(ARGUMENTS) + #CHECK IF ACTS AS EXPECTED + #continue + # ALSO DOESN'T SEEM THE MOST REASONABLE TO TEST + def test_run_case(self): """ + Test that NeonSite class' run_case is working properly... """ - continue - def test_modify_user_nl(self): + #neonsite = NeonSite(ADD SOME PARAMETERS) + #neonsite.run_case(ARGUMENTS) # NOT SURE WE ACTUALLY WANT TO DO THIS DUE TO TIME CONSTRAINTS? + #CHECK IF ACTS AS EXPECTED + #continue + + def test_modify_user_nl_transient(self): """ + Test that NeonSite class' modify_user_nl is correctly adding lines to namelist for transient cases """ - continue + # NeonSite parameters: + name = 'ABBY' + start_year = 2020 + end_year = 2021 + start_month = 1 + end_month = 12 + #finidat = None + finidat = 'dummy_finidat' + + # modify_user_nl parameters: + case_root = self._tempdir + run_type = 'transient' + rundir = '' + + # update namelist + neonsite = NeonSite(name, start_year, end_year, start_month, end_month, finidat) + modified_neonsite = neonsite.modify_user_nl(case_root, run_type, rundir) + + # gather file contents for test + new_nl_file = open(glob.glob(case_root+'/*')[0], "r") + lines_read = new_nl_file.readlines()[0] + new_nl_file.close() + + # assertion + self.assertEqual(lines_read, "finidat = '/inputdata/lnd/ctsm/initdata/dummy_finidat'\n", 'transient case has unexpected nl') + + def test_modify_user_nl_ad(self): + """ + Test that NeonSite class' modify_user_nl is correctly adding lines to namelist for ad cases + """ + # NeonSite parameters: + name = 'ABBY' + start_year = 2020 + end_year = 2021 + start_month = 1 + end_month = 12 + #finidat = None + finidat = 'dummy_finidat' + + # modify_user_nl parameters: + case_root = self._tempdir + run_type = 'ad' + rundir = '' + + # update namelist + neonsite = NeonSite(name, start_year, end_year, start_month, end_month, finidat) + modified_neonsite = neonsite.modify_user_nl(case_root, run_type, rundir) + + # gather file contents for test + new_nl_file = open(glob.glob(case_root+'/*')[0], "r") + lines_read = new_nl_file.readlines()[1] + new_nl_file.close() + + # assertion + self.assertEqual(lines_read, "hist_mfilt = 20\n", 'ad case has unexpected nl') if __name__ == "__main__": diff --git a/tools/site_and_regional/run_neon b/tools/site_and_regional/run_neon index e20189a374..ad930f50e3 100755 --- a/tools/site_and_regional/run_neon +++ b/tools/site_and_regional/run_neon @@ -34,7 +34,6 @@ conda activate ctsm_pylib import os import sys -import argparse # -- add python/ctsm to path _CTSM_PYTHON = os.path.join( From 88372f2b9fc1a3947f0b743268f93f51f6aff4d4 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Thu, 11 Jan 2024 13:39:06 -0700 Subject: [PATCH 10/85] black/pylint --- python/ctsm/site_and_regional/arg_parse.py | 38 +-------- python/ctsm/site_and_regional/neon_site.py | 3 +- python/ctsm/site_and_regional/run_neon.py | 10 +-- python/ctsm/test/test_unit_arg_parse.py | 11 ++- ...nit_NeonSite.py => test_unit_neon_site.py} | 80 ++++++++++--------- tools/site_and_regional/run_neon | 1 + 6 files changed, 59 insertions(+), 84 deletions(-) rename python/ctsm/test/{test_unit_NeonSite.py => test_unit_neon_site.py} (54%) diff --git a/python/ctsm/site_and_regional/arg_parse.py b/python/ctsm/site_and_regional/arg_parse.py index f61a262911..f45fef041c 100644 --- a/python/ctsm/site_and_regional/arg_parse.py +++ b/python/ctsm/site_and_regional/arg_parse.py @@ -3,7 +3,6 @@ """ import argparse -import datetime import logging import os import sys @@ -12,13 +11,11 @@ _CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python")) sys.path.insert(1, _CTSM_PYTHON) +#pylint: disable=wrong-import-position, import-error, unused-import from ctsm import add_cime_to_path from ctsm.utils import parse_isoduration - -from CIME import build -from CIME.case import Case -from CIME.utils import safe_copy, expect, parse_args_and_handle_standard_logging_options, setup_standard_logging_options, symlink_force - +from CIME.utils import parse_args_and_handle_standard_logging_options +from CIME.utils import setup_standard_logging_options def get_parser(args, description, valid_neon_sites): """ @@ -123,7 +120,7 @@ def get_parser(args, description, valid_neon_sites): Type of run to do [default: %(default)s] """, - choices=["ad", "postad", "transient"], #, "sasu"], + choices=["ad", "postad", "transient"], # , "sasu"], default="transient", ) @@ -162,33 +159,6 @@ def get_parser(args, description, valid_neon_sites): default="0Y", ) - #parser.add_argument( - # "--start-date", - # help=""" - # Start date for running CTSM simulation in ISO format. - # [default: %(default)s] - # (currently non-functional) - # """, - # action="store", - # dest="start_date", - # required=False, - # type=datetime.date.fromisoformat, - # default=datetime.datetime.strptime("2018-01-01", "%Y-%m-%d"), - #) - - #parser.add_argument( - # "--end-date", - # help=""" - # End date for running CTSM simulation in ISO format. - # [default: %(default)s] - # """, - # action="store", - # dest="end_date", - # required=False, - # type=datetime.date.fromisoformat, - # default=datetime.datetime.strptime("2021-01-01", "%Y-%m-%d"), - #) - parser.add_argument( "--run-from-postad", help=""" diff --git a/python/ctsm/site_and_regional/neon_site.py b/python/ctsm/site_and_regional/neon_site.py index 2e92e7f04b..8881ef5867 100755 --- a/python/ctsm/site_and_regional/neon_site.py +++ b/python/ctsm/site_and_regional/neon_site.py @@ -3,7 +3,6 @@ """ # Import libraries -import datetime import glob import logging import os @@ -16,6 +15,7 @@ _CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python")) sys.path.insert(1, _CTSM_PYTHON) +#pylint: disable=wrong-import-position, import-error, unused-import from ctsm import add_cime_to_path from ctsm.path_utils import path_to_ctsm_root @@ -390,4 +390,3 @@ def modify_user_nl(self, case_root, run_type, rundir): with open(user_nl_fname, "a") as nl_file: for line in user_nl_lines: nl_file.write("{}\n".format(line)) - diff --git a/python/ctsm/site_and_regional/run_neon.py b/python/ctsm/site_and_regional/run_neon.py index f7ed477cf1..3802133a64 100755 --- a/python/ctsm/site_and_regional/run_neon.py +++ b/python/ctsm/site_and_regional/run_neon.py @@ -61,18 +61,16 @@ _CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python")) sys.path.insert(1, _CTSM_PYTHON) +# pylint: disable=wrong-import-position from ctsm.path_utils import path_to_ctsm_root from ctsm.download_utils import download_file from ctsm.site_and_regional.arg_parse import get_parser from ctsm.site_and_regional.neon_site import NeonSite -from ctsm import add_cime_to_path - -from CIME import build -from CIME.case import Case -from CIME.utils import safe_copy, expect, symlink_force - +# pylint: disable=import-error from standard_script_setup import * +# the above import is need to set CIMEROOT environment variable +# TODO: figure out what specifically needs to be imported (not '*') logger = logging.getLogger(__name__) diff --git a/python/ctsm/test/test_unit_arg_parse.py b/python/ctsm/test/test_unit_arg_parse.py index f7cf7a97ff..1da6b63f77 100755 --- a/python/ctsm/test/test_unit_arg_parse.py +++ b/python/ctsm/test/test_unit_arg_parse.py @@ -47,8 +47,9 @@ def test_function(self): Test that arg_parse is working properly... """ sys.argv = ["--neon-sites ['ABBY']"] - #arguments= ["--neon-sites", "ABBY"] #, "--experiment 'test'", '--overwrite False', '--setup-only True', '--rerun False', '--run-type ad', '--experiment test'] - description='' + # arguments= ["--neon-sites", "ABBY"] #, "--experiment 'test'", '--overwrite False', + # '--setup-only True', '--rerun False', '--run-type ad', '--experiment test'] + description = "" cesmroot = path_to_ctsm_root() valid_neon_sites = glob.glob( os.path.join(cesmroot, "cime_config", "usermods_dirs", "NEON", "[!d]*") @@ -57,11 +58,9 @@ def test_function(self): parsed_arguments = get_parser(sys.argv, description, valid_neon_sites) print(parsed_arguments) - self.assertEqual( - parsed_arguments[0], "ABBY", "arguments not processed as expected" - ) + self.assertEqual(parsed_arguments[0], "ABBY", "arguments not processed as expected") # TODO: Still need to figure out correct formatting to get argument recognized properly! - # TODO: Also it might be useful to add in a number of fake arguments to check that they all work... + # TODO: It might be useful to add a number of arguments to check that they all work... if __name__ == "__main__": diff --git a/python/ctsm/test/test_unit_NeonSite.py b/python/ctsm/test/test_unit_neon_site.py similarity index 54% rename from python/ctsm/test/test_unit_NeonSite.py rename to python/ctsm/test/test_unit_neon_site.py index 1520a58426..a75326e185 100755 --- a/python/ctsm/test/test_unit_NeonSite.py +++ b/python/ctsm/test/test_unit_neon_site.py @@ -3,7 +3,7 @@ Unit tests for NeonSite You can run this by: - python -m unittest test_unit_NeonSite.py + python -m unittest test_unit_neon_site.py """ import unittest @@ -45,89 +45,97 @@ def test_build_base_case(self): """ Test that NeonSite class' build_base_case is working properly... """ - #neonsite = NeonSite(ADD SOME PARAMETERS) - #neonsite.build_base_case(ARGUMENTS) # NOT SURE WE ACTUALLY WANT TO DO THIS DUE TO TIME CONSTRAINTS? - #CHECK IF ACTS AS EXPECTED - #continue + # neonsite = NeonSite(ADD SOME PARAMETERS) + # neonsite.build_base_case(ARGUMENTS) + # NOT SURE WE ACTUALLY WANT TO DO THIS DUE TO TIME CONSTRAINTS? + # CHECK IF ACTS AS EXPECTED + # continue def test_get_batch_query(self): """ Test that NeonSite class' get_batch_query is working properly... """ - #neonsite = NeonSite(ADD SOME PARAMETERS) - #neonsite.get_batch_query(ARGUMENTS) - #CHECK IF ACTS AS EXPECTED - #continue + # neonsite = NeonSite(ADD SOME PARAMETERS) + # neonsite.get_batch_query(ARGUMENTS) + # CHECK IF ACTS AS EXPECTED + # continue # ALSO DOESN'T SEEM THE MOST REASONABLE TO TEST def test_run_case(self): """ Test that NeonSite class' run_case is working properly... """ - #neonsite = NeonSite(ADD SOME PARAMETERS) - #neonsite.run_case(ARGUMENTS) # NOT SURE WE ACTUALLY WANT TO DO THIS DUE TO TIME CONSTRAINTS? - #CHECK IF ACTS AS EXPECTED - #continue + # neonsite = NeonSite(ADD SOME PARAMETERS) + # neonsite.run_case(ARGUMENTS) + # NOT SURE WE ACTUALLY WANT TO DO THIS DUE TO TIME CONSTRAINTS? + # CHECK IF ACTS AS EXPECTED + # continue def test_modify_user_nl_transient(self): """ - Test that NeonSite class' modify_user_nl is correctly adding lines to namelist for transient cases + Test that modify_user_nl is correctly adding lines to namelist for transient cases """ # NeonSite parameters: - name = 'ABBY' + name = "ABBY" start_year = 2020 end_year = 2021 start_month = 1 end_month = 12 - #finidat = None - finidat = 'dummy_finidat' + # finidat = None + finidat = "dummy_finidat" # modify_user_nl parameters: case_root = self._tempdir - run_type = 'transient' - rundir = '' + run_type = "transient" + rundir = "" - # update namelist - neonsite = NeonSite(name, start_year, end_year, start_month, end_month, finidat) - modified_neonsite = neonsite.modify_user_nl(case_root, run_type, rundir) + # create NeonSite object and update namelist + NeonSite(name, start_year, end_year, start_month, end_month, finidat).modify_user_nl( + case_root, run_type, rundir + ) # gather file contents for test - new_nl_file = open(glob.glob(case_root+'/*')[0], "r") + new_nl_file = open(glob.glob(case_root + "/*")[0], "r") lines_read = new_nl_file.readlines()[0] new_nl_file.close() - + # assertion - self.assertEqual(lines_read, "finidat = '/inputdata/lnd/ctsm/initdata/dummy_finidat'\n", 'transient case has unexpected nl') + self.assertEqual( + lines_read, + "finidat = '/inputdata/lnd/ctsm/initdata/dummy_finidat'\n", + "transient case has unexpected nl", + ) def test_modify_user_nl_ad(self): """ - Test that NeonSite class' modify_user_nl is correctly adding lines to namelist for ad cases + Test that modify_user_nl is correctly adding lines to namelist for ad cases """ # NeonSite parameters: - name = 'ABBY' + name = "ABBY" start_year = 2020 end_year = 2021 start_month = 1 end_month = 12 - #finidat = None - finidat = 'dummy_finidat' + # finidat = None + finidat = "dummy_finidat" # modify_user_nl parameters: case_root = self._tempdir - run_type = 'ad' - rundir = '' + run_type = "ad" + rundir = "" - # update namelist - neonsite = NeonSite(name, start_year, end_year, start_month, end_month, finidat) - modified_neonsite = neonsite.modify_user_nl(case_root, run_type, rundir) + # create NeonSite object and update namelist + NeonSite(name, start_year, end_year, start_month, end_month, finidat).modify_user_nl( + case_root, run_type, rundir + ) # gather file contents for test - new_nl_file = open(glob.glob(case_root+'/*')[0], "r") + new_nl_file = open(glob.glob(case_root + "/*")[0], "r") lines_read = new_nl_file.readlines()[1] new_nl_file.close() # assertion - self.assertEqual(lines_read, "hist_mfilt = 20\n", 'ad case has unexpected nl') + self.assertEqual(lines_read, "hist_mfilt = 20\n", "ad case has unexpected nl") if __name__ == "__main__": diff --git a/tools/site_and_regional/run_neon b/tools/site_and_regional/run_neon index ad930f50e3..2a3091f6a6 100755 --- a/tools/site_and_regional/run_neon +++ b/tools/site_and_regional/run_neon @@ -41,6 +41,7 @@ _CTSM_PYTHON = os.path.join( ) sys.path.insert(1, _CTSM_PYTHON) +#pylint: disable=import-error, wrong-import-position from ctsm.site_and_regional.run_neon import main if __name__ == "__main__": From 1c12818024c2d9dfe79481364775e87984646bf0 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Fri, 12 Jan 2024 13:35:49 -0700 Subject: [PATCH 11/85] mostly testing updates --- python/ctsm/site_and_regional/arg_parse.py | 6 ++++- python/ctsm/site_and_regional/neon_site.py | 14 +++++----- python/ctsm/site_and_regional/run_neon.py | 4 +-- python/ctsm/test/test_unit_arg_parse.py | 12 ++++----- python/ctsm/test/test_unit_neon_site.py | 30 ---------------------- tools/site_and_regional/run_neon | 2 +- 6 files changed, 20 insertions(+), 48 deletions(-) diff --git a/python/ctsm/site_and_regional/arg_parse.py b/python/ctsm/site_and_regional/arg_parse.py index f45fef041c..99f184dd62 100644 --- a/python/ctsm/site_and_regional/arg_parse.py +++ b/python/ctsm/site_and_regional/arg_parse.py @@ -11,12 +11,13 @@ _CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python")) sys.path.insert(1, _CTSM_PYTHON) -#pylint: disable=wrong-import-position, import-error, unused-import +# pylint: disable=wrong-import-position, import-error, unused-import, wrong-import-order from ctsm import add_cime_to_path from ctsm.utils import parse_isoduration from CIME.utils import parse_args_and_handle_standard_logging_options from CIME.utils import setup_standard_logging_options + def get_parser(args, description, valid_neon_sites): """ Get parser object for this script. @@ -209,9 +210,12 @@ def get_parser(args, description, valid_neon_sites): run_length = args.run_length run_length = parse_isoduration(run_length) + base_case_root = None if args.base_case_root: base_case_root = os.path.abspath(args.base_case_root) + if not os.path.exists(base_case_root): + raise ValueError("Base case root does not exist: {}".format(base_case_root)) # Reduce output level for this script unless --debug or # --verbose is provided on the command line diff --git a/python/ctsm/site_and_regional/neon_site.py b/python/ctsm/site_and_regional/neon_site.py index 8881ef5867..31ae78f5ad 100755 --- a/python/ctsm/site_and_regional/neon_site.py +++ b/python/ctsm/site_and_regional/neon_site.py @@ -15,7 +15,7 @@ _CTSM_PYTHON = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "python")) sys.path.insert(1, _CTSM_PYTHON) -#pylint: disable=wrong-import-position, import-error, unused-import +# pylint: disable=wrong-import-position, import-error, unused-import, wrong-import-order from ctsm import add_cime_to_path from ctsm.path_utils import path_to_ctsm_root @@ -26,6 +26,7 @@ logger = logging.getLogger(__name__) +# pylint: disable=too-many-instance-attributes class NeonSite: """ A class for encapsulating neon sites. @@ -40,11 +41,6 @@ def __init__(self, name, start_year, end_year, start_month, end_month, finidat): self.cesmroot = path_to_ctsm_root() self.finidat = finidat - def __str__(self): - return ( - str(self.__class__) + "\n" + "\n".join((str(item) + " = " for item in (self.__dict__))) - ) - def build_base_case( self, cesmroot, output_root, res, compset, overwrite=False, setup_only=False ): @@ -66,7 +62,9 @@ def build_base_case( Flag to overwrite the case if exists """ print("---- building a base case -------") + # pylint: disable=attribute-defined-outside-init self.base_case_root = output_root + # pylint: enable=attribute-defined-outside-init user_mods_dirs = [os.path.join(cesmroot, "cime_config", "usermods_dirs", "NEON", self.name)] if not output_root: output_root = os.getcwd() @@ -135,6 +133,7 @@ def build_base_case( # update case_path to be the full path to the base case return case_path + # pylint: disable=no-self-use def get_batch_query(self, case): """ Function for querying the batch queue query command for a case, depending on the @@ -149,6 +148,7 @@ def get_batch_query(self, case): return "none" return case.get_value("batch_query") + # pylint: disable=too-many-statements def run_case( self, base_case_root, @@ -218,12 +218,14 @@ def run_case( # For existing case check that the compset name is correct existingcompname = case.get_value("COMPSET") match = re.search("^HIST", existingcompname, flags=re.IGNORECASE) + # pylint: disable=undefined-variable if re.search("^HIST", compset, flags=re.IGNORECASE) is None: expect( match is None, """Existing base case is a historical type and should not be --rerun with the --overwrite option""", ) + # pylint: enable=undefined-variable else: expect( match is not None, diff --git a/python/ctsm/site_and_regional/run_neon.py b/python/ctsm/site_and_regional/run_neon.py index 3802133a64..31c5bbafb4 100755 --- a/python/ctsm/site_and_regional/run_neon.py +++ b/python/ctsm/site_and_regional/run_neon.py @@ -67,10 +67,8 @@ from ctsm.site_and_regional.arg_parse import get_parser from ctsm.site_and_regional.neon_site import NeonSite -# pylint: disable=import-error +# pylint: disable=import-error, wildcard-import, wrong-import-order from standard_script_setup import * -# the above import is need to set CIMEROOT environment variable -# TODO: figure out what specifically needs to be imported (not '*') logger = logging.getLogger(__name__) diff --git a/python/ctsm/test/test_unit_arg_parse.py b/python/ctsm/test/test_unit_arg_parse.py index 1da6b63f77..71254eee60 100755 --- a/python/ctsm/test/test_unit_arg_parse.py +++ b/python/ctsm/test/test_unit_arg_parse.py @@ -46,9 +46,7 @@ def test_function(self): """ Test that arg_parse is working properly... """ - sys.argv = ["--neon-sites ['ABBY']"] - # arguments= ["--neon-sites", "ABBY"] #, "--experiment 'test'", '--overwrite False', - # '--setup-only True', '--rerun False', '--run-type ad', '--experiment test'] + sys.argv = ["arg_parse", "--neon-sites", "ABBY", "--experiment", "test", "--run-type", "ad"] description = "" cesmroot = path_to_ctsm_root() valid_neon_sites = glob.glob( @@ -57,10 +55,10 @@ def test_function(self): valid_neon_sites = sorted([v.split("/")[-1] for v in valid_neon_sites]) parsed_arguments = get_parser(sys.argv, description, valid_neon_sites) - print(parsed_arguments) - self.assertEqual(parsed_arguments[0], "ABBY", "arguments not processed as expected") - # TODO: Still need to figure out correct formatting to get argument recognized properly! - # TODO: It might be useful to add a number of arguments to check that they all work... + self.assertEqual(parsed_arguments[0][0], "ABBY", "arguments not processed as expected") + self.assertEqual(parsed_arguments[3], "test", "arguments not processed as expected") + self.assertEqual(parsed_arguments[4], False, "arguments not processed as expected") + self.assertEqual(parsed_arguments[2], "ad", "arguments not processed as expected") if __name__ == "__main__": diff --git a/python/ctsm/test/test_unit_neon_site.py b/python/ctsm/test/test_unit_neon_site.py index a75326e185..4828718272 100755 --- a/python/ctsm/test/test_unit_neon_site.py +++ b/python/ctsm/test/test_unit_neon_site.py @@ -41,36 +41,6 @@ def tearDown(self): """ shutil.rmtree(self._tempdir, ignore_errors=True) - def test_build_base_case(self): - """ - Test that NeonSite class' build_base_case is working properly... - """ - # neonsite = NeonSite(ADD SOME PARAMETERS) - # neonsite.build_base_case(ARGUMENTS) - # NOT SURE WE ACTUALLY WANT TO DO THIS DUE TO TIME CONSTRAINTS? - # CHECK IF ACTS AS EXPECTED - # continue - - def test_get_batch_query(self): - """ - Test that NeonSite class' get_batch_query is working properly... - """ - # neonsite = NeonSite(ADD SOME PARAMETERS) - # neonsite.get_batch_query(ARGUMENTS) - # CHECK IF ACTS AS EXPECTED - # continue - # ALSO DOESN'T SEEM THE MOST REASONABLE TO TEST - - def test_run_case(self): - """ - Test that NeonSite class' run_case is working properly... - """ - # neonsite = NeonSite(ADD SOME PARAMETERS) - # neonsite.run_case(ARGUMENTS) - # NOT SURE WE ACTUALLY WANT TO DO THIS DUE TO TIME CONSTRAINTS? - # CHECK IF ACTS AS EXPECTED - # continue - def test_modify_user_nl_transient(self): """ Test that modify_user_nl is correctly adding lines to namelist for transient cases diff --git a/tools/site_and_regional/run_neon b/tools/site_and_regional/run_neon index 2a3091f6a6..ffc3be2af7 100755 --- a/tools/site_and_regional/run_neon +++ b/tools/site_and_regional/run_neon @@ -41,7 +41,7 @@ _CTSM_PYTHON = os.path.join( ) sys.path.insert(1, _CTSM_PYTHON) -#pylint: disable=import-error, wrong-import-position +# pylint: disable=import-error, wrong-import-position from ctsm.site_and_regional.run_neon import main if __name__ == "__main__": From 2d2df94cdc536ca80a925c3dde517516bea9d7c0 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Tue, 16 Jan 2024 12:20:31 -0700 Subject: [PATCH 12/85] rename arg_parse --- .../{arg_parse.py => neon_arg_parse.py} | 0 python/ctsm/site_and_regional/run_neon.py | 2 +- ...it_arg_parse.py => test_unit_neon_arg_parse.py} | 14 +++++++------- 3 files changed, 8 insertions(+), 8 deletions(-) rename python/ctsm/site_and_regional/{arg_parse.py => neon_arg_parse.py} (100%) rename python/ctsm/test/{test_unit_arg_parse.py => test_unit_neon_arg_parse.py} (80%) diff --git a/python/ctsm/site_and_regional/arg_parse.py b/python/ctsm/site_and_regional/neon_arg_parse.py similarity index 100% rename from python/ctsm/site_and_regional/arg_parse.py rename to python/ctsm/site_and_regional/neon_arg_parse.py diff --git a/python/ctsm/site_and_regional/run_neon.py b/python/ctsm/site_and_regional/run_neon.py index 31c5bbafb4..72bf3fdfb4 100755 --- a/python/ctsm/site_and_regional/run_neon.py +++ b/python/ctsm/site_and_regional/run_neon.py @@ -64,7 +64,7 @@ # pylint: disable=wrong-import-position from ctsm.path_utils import path_to_ctsm_root from ctsm.download_utils import download_file -from ctsm.site_and_regional.arg_parse import get_parser +from ctsm.site_and_regional.neon_arg_parse import get_parser from ctsm.site_and_regional.neon_site import NeonSite # pylint: disable=import-error, wildcard-import, wrong-import-order diff --git a/python/ctsm/test/test_unit_arg_parse.py b/python/ctsm/test/test_unit_neon_arg_parse.py similarity index 80% rename from python/ctsm/test/test_unit_arg_parse.py rename to python/ctsm/test/test_unit_neon_arg_parse.py index 71254eee60..863c7e3e5a 100755 --- a/python/ctsm/test/test_unit_arg_parse.py +++ b/python/ctsm/test/test_unit_neon_arg_parse.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 """ -Unit tests for arg_parse +Unit tests for neon_arg_parse You can run this by: - python -m unittest test_unit_arg_parse.py + python -m unittest test_unit_neon_arg_parse.py """ import unittest @@ -19,15 +19,15 @@ # pylint: disable=wrong-import-position from ctsm import unit_testing -from ctsm.site_and_regional.arg_parse import get_parser +from ctsm.site_and_regional.neon_arg_parse import get_parser from ctsm.path_utils import path_to_ctsm_root # pylint: disable=invalid-name -class Testarg_parse(unittest.TestCase): +class Test_neon_arg_parse(unittest.TestCase): """ - Basic class for testing arg_parse.py. + Basic class for testing neon_arg_parse.py. """ def setUp(self): @@ -44,9 +44,9 @@ def tearDown(self): def test_function(self): """ - Test that arg_parse is working properly... + Test that neon_arg_parse is properly reading arguments... """ - sys.argv = ["arg_parse", "--neon-sites", "ABBY", "--experiment", "test", "--run-type", "ad"] + sys.argv = ["neon_arg_parse", "--neon-sites", "ABBY", "--experiment", "test", "--run-type", "ad"] description = "" cesmroot = path_to_ctsm_root() valid_neon_sites = glob.glob( From a59c6e12d5d7cb2bd438bda5f536a9e668e022e4 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Tue, 16 Jan 2024 12:32:19 -0700 Subject: [PATCH 13/85] black arg_parse unit test --- python/ctsm/test/test_unit_neon_arg_parse.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/ctsm/test/test_unit_neon_arg_parse.py b/python/ctsm/test/test_unit_neon_arg_parse.py index 863c7e3e5a..7bae337709 100755 --- a/python/ctsm/test/test_unit_neon_arg_parse.py +++ b/python/ctsm/test/test_unit_neon_arg_parse.py @@ -46,7 +46,15 @@ def test_function(self): """ Test that neon_arg_parse is properly reading arguments... """ - sys.argv = ["neon_arg_parse", "--neon-sites", "ABBY", "--experiment", "test", "--run-type", "ad"] + sys.argv = [ + "neon_arg_parse", + "--neon-sites", + "ABBY", + "--experiment", + "test", + "--run-type", + "ad", + ] description = "" cesmroot = path_to_ctsm_root() valid_neon_sites = glob.glob( From a5e8382425e98ce888958b562102185d98eed2d2 Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Wed, 17 Jan 2024 15:27:36 -0700 Subject: [PATCH 14/85] Answer-changing merge tag: turn Meier2022 on and bring in #2212 --- bld/namelist_files/namelist_defaults_ctsm.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bld/namelist_files/namelist_defaults_ctsm.xml b/bld/namelist_files/namelist_defaults_ctsm.xml index 456d99ac2a..fcb9bc2074 100644 --- a/bld/namelist_files/namelist_defaults_ctsm.xml +++ b/bld/namelist_files/namelist_defaults_ctsm.xml @@ -498,7 +498,11 @@ attributes from the config_cache.xml file (with keys converted to upper-case). + +Meier2022 .true. .false. From 96466e868cd3a674c81a5fdeb929dc99664155a1 Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Thu, 18 Jan 2024 11:44:23 -0700 Subject: [PATCH 15/85] Making Meier2022 the default for ctsm5.1 --- bld/namelist_files/namelist_defaults_ctsm.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/bld/namelist_files/namelist_defaults_ctsm.xml b/bld/namelist_files/namelist_defaults_ctsm.xml index fcb9bc2074..fd5b18f132 100644 --- a/bld/namelist_files/namelist_defaults_ctsm.xml +++ b/bld/namelist_files/namelist_defaults_ctsm.xml @@ -498,11 +498,8 @@ attributes from the config_cache.xml file (with keys converted to upper-case). - -Meier2022 .true. .false. From 1ca0860e63db55f5a75e5089d6da6b3a6ae37fdf Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Thu, 18 Jan 2024 13:20:28 -0700 Subject: [PATCH 16/85] Add subr. setup_logic_roughness_methods to CLMBuildNamelist.pm --- bld/CLMBuildNamelist.pm | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/bld/CLMBuildNamelist.pm b/bld/CLMBuildNamelist.pm index 17c46f9bef..14a3a2ddb5 100755 --- a/bld/CLMBuildNamelist.pm +++ b/bld/CLMBuildNamelist.pm @@ -1571,6 +1571,7 @@ sub process_namelist_inline_logic { setup_logic_irrigate($opts, $nl_flags, $definition, $defaults, $nl); setup_logic_start_type($opts, $nl_flags, $nl); setup_logic_decomp_performance($opts, $nl_flags, $definition, $defaults, $nl); + setup_logic_roughness_methods($opts, $nl_flags, $definition, $defaults, $nl, $physv); setup_logic_snicar_methods($opts, $nl_flags, $definition, $defaults, $nl); setup_logic_snow($opts, $nl_flags, $definition, $defaults, $nl); setup_logic_glacier($opts, $nl_flags, $definition, $defaults, $nl, $envxml_ref); @@ -2002,6 +2003,25 @@ sub setup_logic_decomp_performance { #------------------------------------------------------------------------------- +sub setup_logic_roughness_methods { + my ($opts, $nl_flags, $definition, $defaults, $nl, $physv) = @_; + + add_default($opts, $nl_flags->{'inputdata_rootdir'}, $definition, $defaults, $nl, 'z0param_method', + 'phys'=>$nl_flags->{'phys'} ); + + my $var = remove_leading_and_trailing_quotes( $nl->get_value("z0param_method") ); + if ( $var ne "Meier2022" && $var ne "ZengWang2007" ) { + $log->fatal_error("$var is incorrect entry for the namelist variable z0param_method; expected Meier2022 or ZengWang2007"); + } + my $phys = $physv->as_string(); + if ( $phys eq "clm4_5" || $phys eq "clm5_0" ) { + if ( $var eq "Meier2022" ) { + $log->fatal_error("z0param_method = $var and phys = $phys, but this method has been tested only with clm5_1 and later versions; to use with earlier versions, disable this error, and add Meier2022 parameters to the corresponding params file"); + } + } +} +#------------------------------------------------------------------------------- + sub setup_logic_snicar_methods { my ($opts, $nl_flags, $definition, $defaults, $nl) = @_; From b3a05fae812fff71fbab3e23eca5c51a10b5914a Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Thu, 18 Jan 2024 14:55:40 -0700 Subject: [PATCH 17/85] Add clm51 namelist defaults for crop_residue_removal_frac and tillage_mode --- bld/CLMBuildNamelist.pm | 3 +++ bld/namelist_files/namelist_defaults_ctsm.xml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/bld/CLMBuildNamelist.pm b/bld/CLMBuildNamelist.pm index 14a3a2ddb5..555f0d5f98 100755 --- a/bld/CLMBuildNamelist.pm +++ b/bld/CLMBuildNamelist.pm @@ -2262,6 +2262,7 @@ sub setup_logic_crop_inparm { 'use_crop'=>$nl->get_value('use_crop') ); my $crop_residue_removal_frac = $nl->get_value('crop_residue_removal_frac'); + add_default($opts, $nl_flags->{'inputdata_rootdir'}, $definition, $defaults, $nl, 'crop_residue_removal_frac' ); if ( $crop_residue_removal_frac < 0.0 or $crop_residue_removal_frac > 1.0 ) { $log->fatal_error("crop_residue_removal_frac must be in range [0, 1]"); } @@ -2276,6 +2277,8 @@ sub setup_logic_crop_inparm { sub setup_logic_tillage { my ($opts, $nl_flags, $definition, $defaults, $nl) = @_; + add_default($opts, $nl_flags->{'inputdata_rootdir'}, $definition, $defaults, $nl, 'tillage_mode' ); + my $tillage_mode = remove_leading_and_trailing_quotes( $nl->get_value( "tillage_mode" ) ); if ( $tillage_mode ne "off" && $tillage_mode ne "" && not &value_is_true($nl->get_value('use_crop')) ) { $log->fatal_error( "Tillage only works on crop columns, so use_crop must be true if tillage is enabled." ); diff --git a/bld/namelist_files/namelist_defaults_ctsm.xml b/bld/namelist_files/namelist_defaults_ctsm.xml index fd5b18f132..5ae01ef2ae 100644 --- a/bld/namelist_files/namelist_defaults_ctsm.xml +++ b/bld/namelist_files/namelist_defaults_ctsm.xml @@ -555,6 +555,7 @@ attributes from the config_cache.xml file (with keys converted to upper-case). .true. 0.d+0 +0.5d00 constant @@ -2814,6 +2815,8 @@ use_crop=".true.">lnd/clm2/surfdata_map/ctsm5.1.dev052/landuse.timeseries_mpasa1 off +low + .false. 0.26d00 From 7b14631ef2f7fc5ab02e00669f8580c11f842a86 Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Fri, 19 Jan 2024 09:32:26 -0700 Subject: [PATCH 18/85] Add new expected failures to ExpectedTestFails.xml --- cime_config/testdefs/ExpectedTestFails.xml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/cime_config/testdefs/ExpectedTestFails.xml b/cime_config/testdefs/ExpectedTestFails.xml index 16fff0e71e..55d1363e6b 100644 --- a/cime_config/testdefs/ExpectedTestFails.xml +++ b/cime_config/testdefs/ExpectedTestFails.xml @@ -228,4 +228,25 @@ + + + FAIL + #2325 + + + + + + FAIL + #2325 + + + + + + FAIL + #2325 + + + From ed885061bd9b98ebfcb0aa1286cfbea9ade71325 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 19 Jan 2024 09:42:07 -0700 Subject: [PATCH 19/85] run_sys_tests: Check Python environment for FatesColdTwoStream tests. --- python/ctsm/run_sys_tests.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/python/ctsm/run_sys_tests.py b/python/ctsm/run_sys_tests.py index e4a0bcf009..959d9e52ae 100644 --- a/python/ctsm/run_sys_tests.py +++ b/python/ctsm/run_sys_tests.py @@ -249,7 +249,7 @@ def run_sys_tests( else: raise RuntimeError("None of suite_name, testfile or testlist were provided") if not running_ctsm_py_tests: - _try_systemtests(testname_list) + _check_py_env(testname_list) _run_create_test( cime_path=cime_path, test_args=test_args, @@ -708,7 +708,7 @@ def _run_test_suite( ) -def _try_systemtests(testname_list): +def _check_py_env(test_attributes): err_msg = " can't be loaded. Do you need to activate the ctsm_pylib conda environment?" # Suppress pylint import-outside-toplevel warning because (a) we only want to import # this when certain tests are requested, and (b) the import needs to be in a try-except @@ -716,11 +716,21 @@ def _try_systemtests(testname_list): # pylint: disable=import-outside-toplevel disable # Suppress pylint unused-import warning because the import itself IS the use. # pylint: disable=unused-import disable - if any("FSURDATMODIFYCTSM" in t for t in testname_list): + if any("FSURDATMODIFYCTSM" in t for t in test_attributes): try: import ctsm.modify_input_files.modify_fsurdat except ModuleNotFoundError as err: raise ModuleNotFoundError("modify_fsurdat" + err_msg) from err + if any("FatesColdTwoStream" in t for t in test_attributes): + # This bit is needed because it's outside the top-level python/ directory. + _FATES_DIR = os.path.join( + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,"src", "fates" + ) + sys.path.insert(1, _FATES_DIR) + try: + import tools.modify_fates_paramfile + except ModuleNotFoundError as err: + raise ModuleNotFoundError("modify_fates_paramfile" + err_msg) from err def _get_compilers_for_suite(suite_name, machine_name, running_ctsm_py_tests): @@ -730,7 +740,8 @@ def _get_compilers_for_suite(suite_name, machine_name, running_ctsm_py_tests): "No tests found for suite {} on machine {}".format(suite_name, machine_name) ) if not running_ctsm_py_tests: - _try_systemtests([t["testname"] for t in test_data]) + _check_py_env([t["testname"] for t in test_data]) + _check_py_env([t["testmods"] for t in test_data]) compilers = sorted({one_test["compiler"] for one_test in test_data}) logger.info("Running with compilers: %s", compilers) return compilers From 4f65f39b2baf856150f8ade9268205da385866b2 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 19 Jan 2024 10:36:33 -0700 Subject: [PATCH 20/85] run_sys_tests: Check exact name of testmods. --- python/ctsm/run_sys_tests.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/python/ctsm/run_sys_tests.py b/python/ctsm/run_sys_tests.py index 959d9e52ae..c996f3fdaa 100644 --- a/python/ctsm/run_sys_tests.py +++ b/python/ctsm/run_sys_tests.py @@ -716,12 +716,25 @@ def _check_py_env(test_attributes): # pylint: disable=import-outside-toplevel disable # Suppress pylint unused-import warning because the import itself IS the use. # pylint: disable=unused-import disable + + # Check requirements for FSURDATMODIFYCTSM, if needed if any("FSURDATMODIFYCTSM" in t for t in test_attributes): try: import ctsm.modify_input_files.modify_fsurdat except ModuleNotFoundError as err: raise ModuleNotFoundError("modify_fsurdat" + err_msg) from err - if any("FatesColdTwoStream" in t for t in test_attributes): + + # Isolate testmods, producing a list like ["clm-test1mod1", "clm-test2mod1", "clm-test2mod2", ...] + test_attributes_split = [] + for t in test_attributes: + for x in t.split("."): + y = x.replace("/", "-") + for z in y.split("--"): + test_attributes_split.append(z) + + # Check that list for any testmods that use modify_fates_paramfile.py + testmods_to_check = ["clm-FatesColdTwoStream", "clm-FatesColdTwoStreamNoCompFixedBioGeo"] + if any(t in testmods_to_check for t in test_attributes_split): # This bit is needed because it's outside the top-level python/ directory. _FATES_DIR = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,"src", "fates" From 33be9f67aab9b0db6fa46014e6aa0297024300c4 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 19 Jan 2024 10:56:09 -0700 Subject: [PATCH 21/85] run_sys_tests: Handle tests without testmods. --- python/ctsm/run_sys_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/run_sys_tests.py b/python/ctsm/run_sys_tests.py index c996f3fdaa..7dcafd059f 100644 --- a/python/ctsm/run_sys_tests.py +++ b/python/ctsm/run_sys_tests.py @@ -754,7 +754,7 @@ def _get_compilers_for_suite(suite_name, machine_name, running_ctsm_py_tests): ) if not running_ctsm_py_tests: _check_py_env([t["testname"] for t in test_data]) - _check_py_env([t["testmods"] for t in test_data]) + _check_py_env([t["testmods"] for t in test_data if "testmods" in t.keys()]) compilers = sorted({one_test["compiler"] for one_test in test_data}) logger.info("Running with compilers: %s", compilers) return compilers From c5966a4a77b908f673a0738ab94dd4b4638af410 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 19 Jan 2024 10:57:21 -0700 Subject: [PATCH 22/85] run_sys_tests: Changes to satisfy pylint. --- python/ctsm/run_sys_tests.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/python/ctsm/run_sys_tests.py b/python/ctsm/run_sys_tests.py index 7dcafd059f..0d6cf9dcd5 100644 --- a/python/ctsm/run_sys_tests.py +++ b/python/ctsm/run_sys_tests.py @@ -716,6 +716,9 @@ def _check_py_env(test_attributes): # pylint: disable=import-outside-toplevel disable # Suppress pylint unused-import warning because the import itself IS the use. # pylint: disable=unused-import disable + # Suppress pylint import-error warning because the whole point here is to check + # whether import is possible. + # pylint: disable=import-error disable # Check requirements for FSURDATMODIFYCTSM, if needed if any("FSURDATMODIFYCTSM" in t for t in test_attributes): @@ -724,22 +727,23 @@ def _check_py_env(test_attributes): except ModuleNotFoundError as err: raise ModuleNotFoundError("modify_fsurdat" + err_msg) from err - # Isolate testmods, producing a list like ["clm-test1mod1", "clm-test2mod1", "clm-test2mod2", ...] + # Isolate testmods, producing a list like\ + # ["clm-test1mod1", "clm-test2mod1", "clm-test2mod2", ...] test_attributes_split = [] - for t in test_attributes: - for x in t.split("."): - y = x.replace("/", "-") - for z in y.split("--"): - test_attributes_split.append(z) + for test_attribute in test_attributes: + for dot_split in test_attribute.split("."): + slash_replaced = dot_split.replace("/", "-") + for ddash_split in slash_replaced.split("--"): + test_attributes_split.append(ddash_split) # Check that list for any testmods that use modify_fates_paramfile.py testmods_to_check = ["clm-FatesColdTwoStream", "clm-FatesColdTwoStreamNoCompFixedBioGeo"] if any(t in testmods_to_check for t in test_attributes_split): # This bit is needed because it's outside the top-level python/ directory. - _FATES_DIR = os.path.join( + fates_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,"src", "fates" ) - sys.path.insert(1, _FATES_DIR) + sys.path.insert(1, fates_dir) try: import tools.modify_fates_paramfile except ModuleNotFoundError as err: From 0dcd0a3c1abcaffe5529f8d79a6bc34734b195c7 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 19 Jan 2024 10:57:57 -0700 Subject: [PATCH 23/85] run_sys_tests: Reformatting with black. --- python/ctsm/run_sys_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/run_sys_tests.py b/python/ctsm/run_sys_tests.py index 0d6cf9dcd5..033edf918d 100644 --- a/python/ctsm/run_sys_tests.py +++ b/python/ctsm/run_sys_tests.py @@ -741,7 +741,7 @@ def _check_py_env(test_attributes): if any(t in testmods_to_check for t in test_attributes_split): # This bit is needed because it's outside the top-level python/ directory. fates_dir = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,"src", "fates" + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, "src", "fates" ) sys.path.insert(1, fates_dir) try: From 41e7db5ba74f0a0a4673c9f81fb0cf94ca75eda1 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 19 Jan 2024 10:58:46 -0700 Subject: [PATCH 24/85] Added previous commit to .git-blame-ignore-revs. --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index e63de8e099..8708f8e0c2 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -21,6 +21,7 @@ e4d38681df23ccca0ae29581a45f8362574e0630 025d5e7c2e80263717fb029101d65cbbf261c3c4 a9d96219902cf609636886c7073a84407f450d9a d866510188d26d51bcd6d37239283db690af7e82 +0dcd0a3c1abcaffe5529f8d79a6bc34734b195c7 # Ran SystemTests and python/ctsm through black python formatter 5364ad66eaceb55dde2d3d598fe4ce37ac83a93c 8056ae649c1b37f5e10aaaac79005d6e3a8b2380 From 0244d13a6ce2e87f5bd38947dfb573d58e89e4cd Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Fri, 19 Jan 2024 10:59:39 -0700 Subject: [PATCH 25/85] Making sure tillage stays off when use_crop = .false. --- bld/CLMBuildNamelist.pm | 9 +++++---- bld/namelist_files/namelist_defaults_ctsm.xml | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bld/CLMBuildNamelist.pm b/bld/CLMBuildNamelist.pm index 555f0d5f98..dae7b5f7f0 100755 --- a/bld/CLMBuildNamelist.pm +++ b/bld/CLMBuildNamelist.pm @@ -1637,7 +1637,7 @@ sub process_namelist_inline_logic { ############################### # namelist group: tillage # ############################### - setup_logic_tillage($opts, $nl_flags, $definition, $defaults, $nl); + setup_logic_tillage($opts, $nl_flags, $definition, $defaults, $nl, $physv); ############################### # namelist group: ch4par_in # @@ -2275,12 +2275,13 @@ sub setup_logic_crop_inparm { #------------------------------------------------------------------------------- sub setup_logic_tillage { - my ($opts, $nl_flags, $definition, $defaults, $nl) = @_; + my ($opts, $nl_flags, $definition, $defaults, $nl, $physv) = @_; - add_default($opts, $nl_flags->{'inputdata_rootdir'}, $definition, $defaults, $nl, 'tillage_mode' ); + add_default($opts, $nl_flags->{'inputdata_rootdir'}, $definition, $defaults, $nl, 'tillage_mode', + 'use_crop'=>$nl_flags->{'use_crop'}, 'phys'=>$physv->as_string() ); my $tillage_mode = remove_leading_and_trailing_quotes( $nl->get_value( "tillage_mode" ) ); - if ( $tillage_mode ne "off" && $tillage_mode ne "" && not &value_is_true($nl->get_value('use_crop')) ) { + if ( $tillage_mode ne "off" && $tillage_mode ne "" && not &value_is_true($nl_flags->{'use_crop'}) ) { $log->fatal_error( "Tillage only works on crop columns, so use_crop must be true if tillage is enabled." ); } } diff --git a/bld/namelist_files/namelist_defaults_ctsm.xml b/bld/namelist_files/namelist_defaults_ctsm.xml index 5ae01ef2ae..d5d8bc08eb 100644 --- a/bld/namelist_files/namelist_defaults_ctsm.xml +++ b/bld/namelist_files/namelist_defaults_ctsm.xml @@ -2815,7 +2815,7 @@ use_crop=".true.">lnd/clm2/surfdata_map/ctsm5.1.dev052/landuse.timeseries_mpasa1 off -low +low .false. 0.26d00 From 9a54faf4b8ebd908bdf21acc6fe73d14bba8bb69 Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Fri, 19 Jan 2024 18:57:31 -0700 Subject: [PATCH 26/85] Updated ChangeLog/ChangeSum --- doc/ChangeLog | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++ doc/ChangeSum | 3 +- 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/doc/ChangeLog b/doc/ChangeLog index 5b4a214880..ee6314d1cc 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,4 +1,80 @@ =============================================================== +Tag name: ctsm5.1.dev165 +Originator(s): slevis (Samuel Levis,UCAR/TSS,303-665-1310), oleson (Keith Oleson), samrabin (Sam Rabin) +Date: Fri 19 Jan 2024 06:40:36 PM MST +One-line Summary: Turn Meier2022, tillage, and residue removal on for ctsm5.1, fix #2212 + +Purpose and description of changes +---------------------------------- + +Answer-changing merge-tag: +- Turn Meier2022 on for ctsm5.1. Had turned off temporarily while fixing a bug. +- Bring in Urban answer fix #2212. +- Turn tillage and residue removal on for ctsm5.1. + +Significant changes to scientifically-supported configurations +-------------------------------------------------------------- + +Does this tag change answers significantly for any of the following physics configurations? +(Details of any changes will be given in the "Answer changes" section below.) + + [Put an [X] in the box for any configuration with significant answer changes.] + +[x] clm5_1 + +[ ] clm5_0 + +[ ] ctsm5_0-nwp + +[ ] clm4_5 + + +Bugs fixed +---------- +CTSM issues fixed (include CTSM Issue #): +Fixes #2212 + +Notes of particular relevance for users +--------------------------------------- +Changes made to namelist defaults (e.g., changed parameter values): +- Making Meier2022 the default for ctsm5.1 again. +- Making tillage low by default for ctsm5.1. +- Making residue removal 0.5 by default for ctsm5.1. + +Testing summary: +---------------- + [PASS means all tests PASS; OK means tests PASS other than expected fails.] + + regular tests (aux_clm: https://github.com/ESCOMP/CTSM/wiki/System-Testing-Guide#pre-merge-system-testing): + + derecho ----- OK + izumi ------- OK + +Answer changes +-------------- + +Changes answers relative to baseline: YES + + [ If a tag changes answers relative to baseline comparison the + following should be filled in (otherwise remove this section). + And always remove these three lines and parts that don't apply. ] + + Summarize any changes to answers, i.e., + - what code configurations: ALL + - what platforms/compilers: ALL + - nature of change:i + clm45 and clm50: larger than roundoff + clm51: possibly climate changing + Effect of Meier2022 was documented here: https://github.com/NCAR/LMWG_dev/issues/38 + Effect of tillage and residue removal may require an Answer Changing Tag simulation + +Other details +------------- +Pull Requests that document the changes (include PR ids): + https://github.com/ESCOMP/ctsm/pull/2323 + +=============================================================== +=============================================================== Tag name: ctsm5.1.dev164 Originator(s): rgknox (Ryan Knox) Date: Wed 17 Jan 2024 12:38:18 PM MST diff --git a/doc/ChangeSum b/doc/ChangeSum index dee25d848c..0d563ec2bb 100644 --- a/doc/ChangeSum +++ b/doc/ChangeSum @@ -1,7 +1,8 @@ Tag Who Date Summary ============================================================================================================================ + ctsm5.1.dev165 slevis 01/19/2024 Turn Meier2022, tillage, residue removal on for ctsm5.1, fix #2212 ctsm5.1.dev164 rgknox 01/17/2024 Compatibility and tests for FATES 2-Stream - ctsm5.1.dev163 sam 01/10/2024 Add tillage and residue removal + ctsm5.1.dev163 samrabin 01/10/2024 Add tillage and residue removal ctsm5.1.dev162 samrabin 01/05/2024 Improvements to processing of crop calendar files ctsm5.1.dev161 samrabin 01/04/2024 Refactor 20-year running means of crop GDD accumulation ctsm5.1.dev160 glemieux 12/30/2023 FATES landuse version 1 From 975e628b10ce49990cc60846079f2f735508caaf Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 22 Jan 2024 10:07:07 -0700 Subject: [PATCH 27/85] Fix typo in comment. --- python/ctsm/run_sys_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/run_sys_tests.py b/python/ctsm/run_sys_tests.py index 033edf918d..33ff158f27 100644 --- a/python/ctsm/run_sys_tests.py +++ b/python/ctsm/run_sys_tests.py @@ -727,7 +727,7 @@ def _check_py_env(test_attributes): except ModuleNotFoundError as err: raise ModuleNotFoundError("modify_fsurdat" + err_msg) from err - # Isolate testmods, producing a list like\ + # Isolate testmods, producing a list like # ["clm-test1mod1", "clm-test2mod1", "clm-test2mod2", ...] test_attributes_split = [] for test_attribute in test_attributes: From 473a581de75ceead7c690938b5cfb8fafc9a2001 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 22 Jan 2024 11:06:41 -0700 Subject: [PATCH 28/85] run_sys_tests: Functionize _get_testmod_list(). --- python/ctsm/run_sys_tests.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/python/ctsm/run_sys_tests.py b/python/ctsm/run_sys_tests.py index 33ff158f27..a386598766 100644 --- a/python/ctsm/run_sys_tests.py +++ b/python/ctsm/run_sys_tests.py @@ -708,6 +708,20 @@ def _run_test_suite( ) +def _get_testmod_list(test_attributes): + # Isolate testmods, producing a list like + # ["clm-test1mod1", "clm-test2mod1", "clm-test2mod2", ...] + # Handles test attributes passed in from run_sys_tests calls using -t, -f, or -s + + testmods = [] + for test_attribute in test_attributes: + for dot_split in test_attribute.split("."): + slash_replaced = dot_split.replace("/", "-") + for ddash_split in slash_replaced.split("--"): + testmods.append(ddash_split) + return testmods + + def _check_py_env(test_attributes): err_msg = " can't be loaded. Do you need to activate the ctsm_pylib conda environment?" # Suppress pylint import-outside-toplevel warning because (a) we only want to import @@ -727,18 +741,10 @@ def _check_py_env(test_attributes): except ModuleNotFoundError as err: raise ModuleNotFoundError("modify_fsurdat" + err_msg) from err - # Isolate testmods, producing a list like - # ["clm-test1mod1", "clm-test2mod1", "clm-test2mod2", ...] - test_attributes_split = [] - for test_attribute in test_attributes: - for dot_split in test_attribute.split("."): - slash_replaced = dot_split.replace("/", "-") - for ddash_split in slash_replaced.split("--"): - test_attributes_split.append(ddash_split) - # Check that list for any testmods that use modify_fates_paramfile.py testmods_to_check = ["clm-FatesColdTwoStream", "clm-FatesColdTwoStreamNoCompFixedBioGeo"] - if any(t in testmods_to_check for t in test_attributes_split): + testmods = _get_testmod_list(test_attributes) + if any(t in testmods_to_check for t in testmods): # This bit is needed because it's outside the top-level python/ directory. fates_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, "src", "fates" From 039a243f52d6a1f565ebed513fe90084e14fbd8b Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 22 Jan 2024 11:12:34 -0700 Subject: [PATCH 29/85] Add 'unique' option to _get_testmod_list(), default True. --- python/ctsm/run_sys_tests.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/ctsm/run_sys_tests.py b/python/ctsm/run_sys_tests.py index a386598766..7e232d8efe 100644 --- a/python/ctsm/run_sys_tests.py +++ b/python/ctsm/run_sys_tests.py @@ -708,7 +708,7 @@ def _run_test_suite( ) -def _get_testmod_list(test_attributes): +def _get_testmod_list(test_attributes, unique=True): # Isolate testmods, producing a list like # ["clm-test1mod1", "clm-test2mod1", "clm-test2mod2", ...] # Handles test attributes passed in from run_sys_tests calls using -t, -f, or -s @@ -718,7 +718,9 @@ def _get_testmod_list(test_attributes): for dot_split in test_attribute.split("."): slash_replaced = dot_split.replace("/", "-") for ddash_split in slash_replaced.split("--"): - testmods.append(ddash_split) + if ddash_split not in testmods or not unique: + testmods.append(ddash_split) + return testmods From a7dbc378cf88f2e0befdd34e83bca5902995fa89 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 22 Jan 2024 11:45:03 -0700 Subject: [PATCH 30/85] _get_testmod_list(): Only include strings with 'clm-'. --- python/ctsm/run_sys_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ctsm/run_sys_tests.py b/python/ctsm/run_sys_tests.py index 7e232d8efe..de93081504 100644 --- a/python/ctsm/run_sys_tests.py +++ b/python/ctsm/run_sys_tests.py @@ -708,7 +708,7 @@ def _run_test_suite( ) -def _get_testmod_list(test_attributes, unique=True): +def _get_testmod_list(test_attributes, unique=False): # Isolate testmods, producing a list like # ["clm-test1mod1", "clm-test2mod1", "clm-test2mod2", ...] # Handles test attributes passed in from run_sys_tests calls using -t, -f, or -s @@ -718,7 +718,7 @@ def _get_testmod_list(test_attributes, unique=True): for dot_split in test_attribute.split("."): slash_replaced = dot_split.replace("/", "-") for ddash_split in slash_replaced.split("--"): - if ddash_split not in testmods or not unique: + if "clm-" in ddash_split and (ddash_split not in testmods or not unique): testmods.append(ddash_split) return testmods From 8d7d88a245b7f0e7fc4d10a060a5fee567ada2de Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 22 Jan 2024 11:45:42 -0700 Subject: [PATCH 31/85] Add unit tests for _get_testmod_list(). --- python/ctsm/test/test_unit_run_sys_tests.py | 53 ++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/python/ctsm/test/test_unit_run_sys_tests.py b/python/ctsm/test/test_unit_run_sys_tests.py index ee5197d76f..65ec1df5a5 100755 --- a/python/ctsm/test/test_unit_run_sys_tests.py +++ b/python/ctsm/test/test_unit_run_sys_tests.py @@ -16,7 +16,7 @@ from ctsm import add_cime_to_path # pylint: disable=unused-import from ctsm import unit_testing -from ctsm.run_sys_tests import run_sys_tests +from ctsm.run_sys_tests import run_sys_tests, _get_testmod_list from ctsm.machine_defaults import MACHINE_DEFAULTS from ctsm.machine import create_machine from ctsm.joblauncher.job_launcher_factory import JOB_LAUNCHER_FAKE @@ -269,6 +269,57 @@ def test_withDryRun_nothingDone(self): self.assertEqual(os.listdir(self._scratch), []) self.assertEqual(machine.job_launcher.get_commands(), []) + def test_getTestmodList_suite(self): + """Ensure that _get_testmod_list() works correctly with suite-style input""" + input = [ + "clm/default", + "clm/default", + "clm/crop", + "clm/cropMonthlyOutput", + ] + target = [ + "clm-default", + "clm-default", + "clm-crop", + "clm-cropMonthlyOutput", + ] + output = _get_testmod_list(input, unique=False) + self.assertEqual(output, target) + + def test_getTestmodList_suite_unique(self): + """Ensure that _get_testmod_list() works correctly with unique=True""" + input = [ + "clm/default", + "clm/default", + "clm/crop", + "clm/cropMonthlyOutput", + ] + target = [ + "clm-default", + "clm-crop", + "clm-cropMonthlyOutput", + ] + + output = _get_testmod_list(input, unique=True) + self.assertEqual(output, target) + + def test_getTestmodList_testname(self): + """Ensure that _get_testmod_list() works correctly with full test name(s) specified""" + input = [ + "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-crop", + "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default", + ] + target = ["clm-crop", "clm-default"] + output = _get_testmod_list(input) + self.assertEqual(output, target) + + def test_getTestmodList_twomods(self): + """Ensure that _get_testmod_list() works correctly with full test name(s) specified and two mods in one test""" + input = ["ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default--clm-crop"] + target = ["clm-default", "clm-crop"] + output = _get_testmod_list(input) + self.assertEqual(output, target) + if __name__ == "__main__": unit_testing.setup_for_tests() From 4ae91064cf476e1b6ce76d98f7ec668befedaa1d Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 22 Jan 2024 12:58:51 -0700 Subject: [PATCH 32/85] Add Izumi version of the aux_clm unit testing. --- cime_config/testdefs/testlist_clm.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index c915552748..4761a2111f 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -3433,6 +3433,7 @@ + From 02950989606c1c772303fe21ab3a1fc3dca6c076 Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Tue, 23 Jan 2024 18:09:59 -0700 Subject: [PATCH 33/85] First drafts of ChangeLog/ChangeSum --- doc/ChangeLog | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++ doc/ChangeSum | 1 + 2 files changed, 82 insertions(+) diff --git a/doc/ChangeLog b/doc/ChangeLog index ee6314d1cc..34af74d42d 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,4 +1,85 @@ =============================================================== +Tag name: ctsm5.1.dev166 +Originator(s): slevis (Samuel Levis,UCAR/TSS,303-665-1310), tking (Teagan King), afoster (Adrianna Foster), samrabin (Sam Rabin) +Date: Tue 23 Jan 2024 05:48:34 PM MST +One-line Summary: BFB merge tag + +Purpose and description of changes +---------------------------------- + + #2315 @TeaganKing Refactoring run_neon for PLUMBER2 part1 + #2213 @samsrabin Automatically assign high priority items to project 25 + #2330 @samsrabin Add Izumi version of the aux_clm unit testing + #2326 @samsrabin run_sys_tests: Check Python environment for FatesColdTwoStream tests +KEEP? #2124 @adrifoster Create helper function for interpMontlyVeg to improve readability + +Significant changes to scientifically-supported configurations +-------------------------------------------------------------- + +Does this tag change answers significantly for any of the following physics configurations? +(Details of any changes will be given in the "Answer changes" section below.) + + [Put an [X] in the box for any configuration with significant answer changes.] + +[ ] clm5_1 + +[ ] clm5_0 + +[ ] ctsm5_0-nwp + +[ ] clm4_5 + + +Bugs fixed +---------- + +CTSM issues fixed (include CTSM Issue #): + Fixes #2315 + Fixes #2213 + Fixes #2330 + Fixes #2326 +KEEP? Fixes #2124 + +Known bugs introduced in this tag (include issue #): + New feature coming in with #2213 where user will receive email from + github when pushing to their remote: + "Run failed: .github/workflows/assign-to-project.yml" + +Notes of particular relevance for developers: +--------------------------------------------- +Changes to tests or testing: + #2315 New unit tests for arg_parse and NeonSite + #2330 New test in aux_clm that does unit testing on izumi because unit + testing does not work on derecho, yet + +Testing summary: +---------------- + + [PASS means all tests PASS; OK means tests PASS other than expected fails.] + + python testing (if python code has changed; see instructions in python/README.md; document testing done): + + derecho - OK, pylint gives long list of warnings (expected) + + regular tests (aux_clm: https://github.com/ESCOMP/CTSM/wiki/System-Testing-Guide#pre-merge-system-testing): + + derecho ----- + izumi ------- + + +Answer changes +-------------- + +Changes answers relative to baseline: No + +Other details +------------- + +Pull Requests that document the changes (include PR ids): + https://github.com/ESCOMP/ctsm/pull/2334 + +=============================================================== +=============================================================== Tag name: ctsm5.1.dev165 Originator(s): slevis (Samuel Levis,UCAR/TSS,303-665-1310), oleson (Keith Oleson), samrabin (Sam Rabin) Date: Fri 19 Jan 2024 06:40:36 PM MST diff --git a/doc/ChangeSum b/doc/ChangeSum index 0d563ec2bb..a796d1b09f 100644 --- a/doc/ChangeSum +++ b/doc/ChangeSum @@ -1,5 +1,6 @@ Tag Who Date Summary ============================================================================================================================ + ctsm5.1.dev166 multiple 01/23/2024 BFB merge tag ctsm5.1.dev165 slevis 01/19/2024 Turn Meier2022, tillage, residue removal on for ctsm5.1, fix #2212 ctsm5.1.dev164 rgknox 01/17/2024 Compatibility and tests for FATES 2-Stream ctsm5.1.dev163 samrabin 01/10/2024 Add tillage and residue removal From 8b262bdc0ec4972ec786ea8659ea8d300e2322d2 Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Wed, 24 Jan 2024 11:40:54 -0700 Subject: [PATCH 34/85] Revert "Merge remote-tracking branch 'adrifoster/interpMonthlyVeg_helper_function' into bfb_merge_tag_2315_et_al" This reverts commit d327ec5cfa4ce4eab53a66fc71cc1c3c8395fe61, reversing changes made to 8189178f49574c46990694024444dac7153e175e. --- src/biogeochem/SatellitePhenologyMod.F90 | 50 ------------------------ src/main/clm_driver.F90 | 37 +++++++++++++++++- 2 files changed, 35 insertions(+), 52 deletions(-) diff --git a/src/biogeochem/SatellitePhenologyMod.F90 b/src/biogeochem/SatellitePhenologyMod.F90 index 7714af02eb..3e9341f430 100644 --- a/src/biogeochem/SatellitePhenologyMod.F90 +++ b/src/biogeochem/SatellitePhenologyMod.F90 @@ -28,7 +28,6 @@ module SatellitePhenologyMod public :: SatellitePhenologyInit ! Dynamically allocate memory public :: interpMonthlyVeg ! interpolate monthly vegetation data public :: readAnnualVegetation ! Read in annual vegetation (needed for Dry-deposition) - public :: do_interpMonthlyVeg ! whether or not to call interpMonthlyVeg ! ! !PRIVATE MEMBER FUNCTIONS: private :: readMonthlyVegetation ! read monthly vegetation data for two months @@ -258,55 +257,6 @@ subroutine interpMonthlyVeg (bounds, canopystate_inst) end subroutine interpMonthlyVeg !============================================================================== - - logical function do_interpMonthlyVeg(use_cn, use_fates, use_fates_sp, doalb, n_drydep) - ! - ! !DESCRIPTION: - ! returns whether or not to conduct interpMonthlyVeg subroutine - ! - ! !ARGUMENTS: - logical, intent(in) :: use_cn ! are we using the big-leaf, BGC version of model? - logical, intent(in) :: use_fates ! are we running FATES? - logical, intent(in) :: use_fates_sp ! are we running FATES-SP? - logical, intent(in) :: doalb ! true if time for surface albedo calc - integer, intent(in) :: n_drydep ! number in drypdep list - - if (use_cn .and. n_drydep > 0) then - - ! For dry-deposition need to call CLMSP so that mlaidiff is obtained - ! NOTE: This is also true of FATES below - do_interpMonthlyVeg = .true. - - else if (use_fates .and. use_fates_sp) then - - ! For FATES-Specified phenology mode interpolate the weights for - ! time-interpolation of monthly vegetation data (as in SP mode below) - ! Also for FATES with dry-deposition as above need to call CLMSP so that mlaidiff is obtained - !if ( use_fates_sp .or. (n_drydep > 0 ) ) then ! Replace with this when we have dry-deposition working - ! For now don't allow for dry-deposition because of issues in #1044 EBK Jun/17/2022 - do_interpMonthlyVeg = .true. - - else if (doalb .or. n_drydep > 0) then - - ! Determine weights for time interpolation of monthly vegetation data. - ! This also determines whether it is time to read new monthly vegetation and - ! obtain updated leaf area index [mlai1,mlai2], stem area index [msai1,msai2], - ! vegetation top [mhvt1,mhvt2] and vegetation bottom [mhvb1,mhvb2]. The - ! weights obtained here are used in subroutine SatellitePhenology to obtain time - ! interpolated values. - ! This is also done for FATES-SP mode above - do_interpMonthlyVeg = .true. - - else - - do_interpMonthlyVeg = .false. - - end if - - end function do_interpMonthlyVeg - - !============================================================================== - subroutine readAnnualVegetation (bounds, canopystate_inst) ! ! !DESCRIPTION: diff --git a/src/main/clm_driver.F90 b/src/main/clm_driver.F90 index 47dcf37cf0..33e9412ba9 100644 --- a/src/main/clm_driver.F90 +++ b/src/main/clm_driver.F90 @@ -58,7 +58,7 @@ module clm_driver use UrbanRadiationMod , only : UrbanRadiation ! use SoilBiogeochemVerticalProfileMod , only : SoilBiogeochemVerticalProfile - use SatellitePhenologyMod , only : SatellitePhenology, interpMonthlyVeg, do_interpMonthlyVeg + use SatellitePhenologyMod , only : SatellitePhenology, interpMonthlyVeg use ndepStreamMod , only : ndep_interp use cropcalStreamMod , only : cropcal_advance, cropcal_interp use ch4Mod , only : ch4, ch4_init_gridcell_balance_check, ch4_init_column_balance_check @@ -227,11 +227,44 @@ subroutine clm_drv(doalb, nextsw_cday, declinp1, declin, rstwr, nlend, rdate, ro ! Specified phenology ! Done in SP mode, FATES-SP mode and also when dry-deposition is active ! ============================================================================ + + if (use_cn) then + ! For dry-deposition need to call CLMSP so that mlaidiff is obtained + ! NOTE: This is also true of FATES below + if ( n_drydep > 0 ) then + call t_startf('interpMonthlyVeg') + call interpMonthlyVeg(bounds_proc, canopystate_inst) + call t_stopf('interpMonthlyVeg') + endif - if (do_interpMonthlyVeg(use_cn, use_fates, use_fates_sp, doalb, n_drydep)) then + elseif(use_fates) then + + ! For FATES-Specified phenology mode interpolate the weights for + ! time-interpolation of monthly vegetation data (as in SP mode below) + ! Also for FATES with dry-deposition as above need to call CLMSP so that mlaidiff is obtained + !if ( use_fates_sp .or. (n_drydep > 0 ) ) then ! Replace with this when we have dry-deposition working + ! For now don't allow for dry-deposition because of issues in #1044 EBK Jun/17/2022 + if ( use_fates_sp ) then call t_startf('interpMonthlyVeg') call interpMonthlyVeg(bounds_proc, canopystate_inst) call t_stopf('interpMonthlyVeg') + end if + + else + + ! Determine weights for time interpolation of monthly vegetation data. + ! This also determines whether it is time to read new monthly vegetation and + ! obtain updated leaf area index [mlai1,mlai2], stem area index [msai1,msai2], + ! vegetation top [mhvt1,mhvt2] and vegetation bottom [mhvb1,mhvb2]. The + ! weights obtained here are used in subroutine SatellitePhenology to obtain time + ! interpolated values. + ! This is also done for FATES-SP mode above + if ( doalb .or. ( n_drydep > 0 ) )then + call t_startf('interpMonthlyVeg') + call interpMonthlyVeg(bounds_proc, canopystate_inst) + call t_stopf('interpMonthlyVeg') + end if + end if ! ================================================================================== From 6a27557ff72c615359f457f2459a668a762a4c12 Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Wed, 24 Jan 2024 17:41:02 -0700 Subject: [PATCH 35/85] Updated ChangeLog/ChangeSum --- doc/ChangeLog | 19 ++++++++++--------- doc/ChangeSum | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/doc/ChangeLog b/doc/ChangeLog index 34af74d42d..dbb9b05c84 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,7 +1,7 @@ =============================================================== Tag name: ctsm5.1.dev166 -Originator(s): slevis (Samuel Levis,UCAR/TSS,303-665-1310), tking (Teagan King), afoster (Adrianna Foster), samrabin (Sam Rabin) -Date: Tue 23 Jan 2024 05:48:34 PM MST +Originator(s): slevis (Samuel Levis,UCAR/TSS,303-665-1310), tking (Teagan King), samrabin (Sam Rabin) +Date: Wed 24 Jan 2024 05:39:41 PM MST One-line Summary: BFB merge tag Purpose and description of changes @@ -11,7 +11,6 @@ Purpose and description of changes #2213 @samsrabin Automatically assign high priority items to project 25 #2330 @samsrabin Add Izumi version of the aux_clm unit testing #2326 @samsrabin run_sys_tests: Check Python environment for FatesColdTwoStream tests -KEEP? #2124 @adrifoster Create helper function for interpMontlyVeg to improve readability Significant changes to scientifically-supported configurations -------------------------------------------------------------- @@ -38,12 +37,16 @@ CTSM issues fixed (include CTSM Issue #): Fixes #2213 Fixes #2330 Fixes #2326 -KEEP? Fixes #2124 Known bugs introduced in this tag (include issue #): - New feature coming in with #2213 where user will receive email from + - New feature coming in with #2213 where user will receive email from github when pushing to their remote: "Run failed: .github/workflows/assign-to-project.yml" + - New feature that also affects older tags: The izumi FatesColdTwoStream + test submitted from ./run_sys_tests will fail at CREATE_NEWCASE unless users + introduce "module load lang/python/3.7.0" in their .bash_profile. + Longterm solution discussed in #2335. The test also works when submitted + manually with ./create_test. Notes of particular relevance for developers: --------------------------------------------- @@ -63,18 +66,16 @@ Testing summary: regular tests (aux_clm: https://github.com/ESCOMP/CTSM/wiki/System-Testing-Guide#pre-merge-system-testing): - derecho ----- - izumi ------- + derecho ----- OK + izumi ------- OK Answer changes -------------- - Changes answers relative to baseline: No Other details ------------- - Pull Requests that document the changes (include PR ids): https://github.com/ESCOMP/ctsm/pull/2334 diff --git a/doc/ChangeSum b/doc/ChangeSum index a796d1b09f..bfc8b86174 100644 --- a/doc/ChangeSum +++ b/doc/ChangeSum @@ -1,6 +1,6 @@ Tag Who Date Summary ============================================================================================================================ - ctsm5.1.dev166 multiple 01/23/2024 BFB merge tag + ctsm5.1.dev166 multiple 01/24/2024 BFB merge tag ctsm5.1.dev165 slevis 01/19/2024 Turn Meier2022, tillage, residue removal on for ctsm5.1, fix #2212 ctsm5.1.dev164 rgknox 01/17/2024 Compatibility and tests for FATES 2-Stream ctsm5.1.dev163 samrabin 01/10/2024 Add tillage and residue removal From ee7e6f693634f377bf770f9506cbe783727a11c6 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 29 Jan 2024 15:54:24 -0700 Subject: [PATCH 36/85] Do not exit 'make all' if pylint fails. Resolves #2316. --- python/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/Makefile b/python/Makefile index 271e977046..440e2e0de8 100644 --- a/python/Makefile +++ b/python/Makefile @@ -19,7 +19,7 @@ ifneq ($(verbose), not-set) endif PYLINT=pylint -PYLINT_ARGS=-j 4 --rcfile=ctsm/.pylintrc +PYLINT_ARGS=-j 4 --rcfile=ctsm/.pylintrc --fail-under=0 PYLINT_SRC = \ ctsm # NOTE: These don't pass pylint checking and should be added when we put into effort to get them to pass From 25f7d216e74a096a2c95a989293c0e6d42e0c41b Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 29 Jan 2024 16:14:07 -0700 Subject: [PATCH 37/85] Fix pylint for ctsm_pylib_dependent_utils.py (except missing-module-docstring). --- python/ctsm/ctsm_pylib_dependent_utils.py | 63 +++++++++++++---------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/python/ctsm/ctsm_pylib_dependent_utils.py b/python/ctsm/ctsm_pylib_dependent_utils.py index 13ccf7a969..4f149c53a9 100644 --- a/python/ctsm/ctsm_pylib_dependent_utils.py +++ b/python/ctsm/ctsm_pylib_dependent_utils.py @@ -1,49 +1,56 @@ -from ctsm.utils import abort import numpy as np +from ctsm.utils import abort -def import_coord_1d(ds, coordName): +def import_coord_1d(data_set, coord_name): """Import 1-d coordinate variable Args: - ds (xarray Dataset): Dataset whose coordinate you want to import. - coordName (str): Name of coordinate to import + data_set (xarray Dataset): Dataset whose coordinate you want to import. + coord_name (str): Name of coordinate to import Returns: xarray DataArray: DataArray corresponding to the requested coordinate. """ - da = ds[coordName] - if len(da.dims) != 1: - abort(f"Expected 1 dimension for {coordName}; found {len(da.dims)}: {da.dims}") - return da, len(da) + data_array = data_set[coord_name] + if len(data_array.dims) != 1: + abort(f"Expected 1 dimension for {coord_name}; " + + f"found {len(data_array.dims)}: {data_array.dims}") + return data_array, len(data_array) -def import_coord_2d(ds, coordName, varName): - """Import 2-d latitude or longitude variable from a CESM history file (e.g., name LATIXY or LONGXY) and return it as a 1-d DataArray that can be used as a coordinate for writing CESM input files +def import_coord_2d(data_set, coord_name, var_name): + """ + Import 2-d latitude or longitude variable from a CESM history file (e.g., name LATIXY + or LONGXY and return it as a 1-d DataArray that can be used as a coordinate for writing + CESM input files Args: - ds (xarray Dataset): Dataset whose coordinate you want to import. - coordName (str): Name of coordinate to import - varName (str): Name of variable with dimension coordName + data_set (xarray Dataset): Dataset whose coordinate you want to import. + coord_name (str): Name of coordinate to import + var_name (str): Name of variable with dimension coord_name Returns: xarray DataArray: 1-d variable that can be used as a coordinate for writing CESM input files int: Length of that variable """ - da = ds[varName] - thisDim = [x for x in da.dims if coordName in x] - if len(thisDim) != 1: - abort(f"Expected 1 dimension name containing {coordName}; found {len(thisDim)}: {thisDim}") - thisDim = thisDim[0] - otherDim = [x for x in da.dims if coordName not in x] - if len(otherDim) != 1: + data_array = data_set[var_name] + this_dim = [x for x in data_array.dims if coord_name in x] + if len(this_dim) != 1: + abort(f"Expected 1 dimension name containing {coord_name}; " + + f"found {len(this_dim)}: {this_dim}") + this_dim = this_dim[0] + other_dim = [x for x in data_array.dims if coord_name not in x] + if len(other_dim) != 1: abort( - f"Expected 1 dimension name not containing {coordName}; found {len(otherDim)}: {otherDim}" + f"Expected 1 dimension name not containing {coord_name}; " + + f"found {len(other_dim)}: {other_dim}" ) - otherDim = otherDim[0] - da = da.astype(np.float32) - da = da.isel({otherDim: [0]}).squeeze().rename({thisDim: coordName}).rename(coordName) - da = da.assign_coords({coordName: da.values}) - da.attrs["long_name"] = "coordinate " + da.attrs["long_name"] - da.attrs["units"] = da.attrs["units"].replace(" ", "_") - return da, len(da) + other_dim = other_dim[0] + data_array = data_array.astype(np.float32) + data_array = data_array.isel({other_dim: [0]}).squeeze() + data_array = data_array.rename({this_dim: coord_name}).rename(coord_name) + data_array = data_array.assign_coords({coord_name: data_array.values}) + data_array.attrs["long_name"] = "coordinate " + data_array.attrs["long_name"] + data_array.attrs["units"] = data_array.attrs["units"].replace(" ", "_") + return data_array, len(data_array) From 714033708b6bfd0617dcc4b11e6ef40a6da9c45a Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Mon, 29 Jan 2024 21:31:46 -0700 Subject: [PATCH 38/85] Fix pylint for process_ggcmi_sdates.py. --- .../crop_calendars/process_ggcmi_shdates.py | 391 +++++++++++------- 1 file changed, 253 insertions(+), 138 deletions(-) diff --git a/python/ctsm/crop_calendars/process_ggcmi_shdates.py b/python/ctsm/crop_calendars/process_ggcmi_shdates.py index 835f91cb22..cada2b421b 100644 --- a/python/ctsm/crop_calendars/process_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/process_ggcmi_shdates.py @@ -1,16 +1,21 @@ -import numpy as np -import xarray as xr -import os -import datetime as dt -import cftime +""" +Convert GGCMI crop calendar files for use in CTSM +""" + import sys import argparse import logging +import os +import datetime as dt +import numpy as np +import xarray as xr +import cftime # -- add python/ctsm to path (needed if we want to run process_ggcmi_shdates stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) +# pylint: disable=wrong-import-position from ctsm import ctsm_logging import ctsm.crop_calendars.cropcal_utils as utils import ctsm.crop_calendars.regrid_ggcmi_shdates as regrid @@ -18,19 +23,28 @@ logger = logging.getLogger(__name__) -def get_cft(y): - return cftime.DatetimeNoLeap(y, 1, 1, 0, 0, 0, 0, has_year_zero=True) +def get_cft(year): + """ + Given a year, return the cftime.DatetimeNoLeap of Jan. 1 at 00:00. + """ + return cftime.DatetimeNoLeap(year, 1, 1, 0, 0, 0, 0, has_year_zero=True) -def get_dayssince_jan1y1(y1, y): - cft_y1 = get_cft(y1) - cft_y = get_cft(y) +def get_dayssince_jan1y1(year1, year): + """ + Get the number of days since Jan. 1 of year1 + """ + cft_y1 = get_cft(year1) + cft_y = get_cft(year) time_delta = cft_y - cft_y1 time_delta_secs = time_delta.total_seconds() return time_delta_secs / (60 * 60 * 24) def main(): + """ + main() function for calling process_ggcmi_shdates.py from command line. + """ ctsm_logging.setup_logging_pre_config() args = process_ggcmi_shdates_args() process_ggcmi_shdates( @@ -40,7 +54,6 @@ def main(): args.file_specifier, args.first_year, args.last_year, - args.verbose, args.ggcmi_author, args.regrid_resolution, args.regrid_template_file, @@ -50,8 +63,14 @@ def main(): def process_ggcmi_shdates_args(): + """ + Set up and parse input arguments for working with GGCMI crop calendar files + """ parser = argparse.ArgumentParser( - description="Converts raw sowing and harvest date files provided by GGCMI into a format that CLM can read, optionally at a target resolution." + description=( + "Converts raw sowing and harvest date files provided by GGCMI into " + + "a format that CLM can read, optionally at a target resolution." + ) ) # Required @@ -72,7 +91,10 @@ def process_ggcmi_shdates_args(): parser.add_argument( "-a", "--author", - help="String to be saved in author_thisfile attribute of output files. E.g., 'Author Name (authorname@ucar.edu)'", + help=( + "String to be saved in author_thisfile attribute of output files. " + + "E.g., 'Author Name (authorname@ucar.edu)'" + ), type=str, required=True, ) @@ -80,21 +102,30 @@ def process_ggcmi_shdates_args(): # Optional parser.add_argument( "--file-specifier", - help="String following CROP_IRR_ in input filenames. E.g., mai_ir_FILESPECIFIER.nc4. Will also be saved to output filenames.", + help=( + "String following CROP_IRR_ in input filenames. E.g., mai_ir_FILESPECIFIER.nc4. " + + "Will also be saved to output filenames." + ), type=str, default="ggcmi_crop_calendar_phase3_v1.01", ) parser.add_argument( "-y1", "--first-year", - help="First year in output files. Must be present in template file, unless it's the same as the last year.", + help=( + "First year in output files. Must be present in template file, " + + "unless it's the same as the last year." + ), type=int, default=2000, ) parser.add_argument( "-yN", "--last-year", - help="Last year in output files. Must be present in template file, unless it's the same as the first year.", + help=( + "Last year in output files. Must be present in template file, " + + "unless it's the same as the first year." + ), type=int, default=2000, ) @@ -117,53 +148,19 @@ def process_ggcmi_shdates_args(): return args -def process_ggcmi_shdates( - input_directory, - output_directory, - author, - file_specifier, - first_year, - last_year, - verbose, - ggcmi_author, - regrid_resolution, - regrid_template_file, - regrid_extension, - crop_list, -): - - input_directory = os.path.realpath(input_directory) - output_directory = os.path.realpath(output_directory) - - ############################################################ - ### Regrid original GGCMI files to target CLM resolution ### - ############################################################ - - regridded_ggcmi_files_dir = os.path.join( - output_directory, f"regridded_ggcmi_files-{regrid_resolution}" - ) +def setup_crop_dict(): + """ + Associate CLM crop names with (1) their integer counterpart and (2) their GGCMI counterpart. - regrid.regrid_ggcmi_shdates( - regrid_resolution, - regrid_template_file, - input_directory, - regridded_ggcmi_files_dir, - regrid_extension, - crop_list, - ) + Some notes: + - As "CLMname: {clm_num, thiscrop_ggcmi}" + - CLM names and numbers taken from commit 3dcbc7499a57904750a994672fc36b4221b9def5 + - Using one global GGCMI value for both temperate and tropical versions of corn and soybean. + - There is no GGCMI equivalent of CLM's winter barley and rye. Using winter wheat instead. + - Using GGCMI "pea" for CLM pulses, as suggested by GGCMI phase 3 protocol. + - Only using GGCMI "ri1" for rice; ignoring "ri2". + """ - ########################### - ### Define dictionaries ### - ########################### - - # First, we associate CLM crop names with (1) their integer counterpart and (2) their GGCMI counterpart. - # Some notes: - # - As "CLMname: {clm_num, thiscrop_ggcmi}" - # - CLM names and numbers taken from commit `3dcbc7499a57904750a994672fc36b4221b9def5` - # - Using one global GGCMI value for both temperate and tropical versions of corn and soybean. - # - There is no GGCMI equivalent of CLM's winter barley and rye. Using winter wheat instead. - # - Using GGCMI `pea` for CLM pulses, as suggested by GGCMI phase 3 protocol. - # - Only using GGCMI `ri1` for rice; ignoring `ri2`. def set_crop_dict(thisnum, thisname): return {"clm_num": thisnum, "thiscrop_ggcmi": thisname} @@ -234,8 +231,16 @@ def set_crop_dict(thisnum, thisname): "c3_irrigated": set_crop_dict(16, None), } - # Next, we associate CLM variable names with their GGCMI counterparts. We also save a placeholder for output file paths associated with each variable. - # As CLMname: {GGCMIname, output_file} + return crop_dict + + +def setup_var_dict(): + """ + Associate CLM variable names with their GGCMI counterparts. + - We also save a placeholder for output file paths associated with each variable. + - As CLMname: {GGCMIname, output_file} + """ + def set_var_dict(name_ggcmi, outfile): return {"name_ggcmi": name_ggcmi, "outfile": outfile} @@ -243,23 +248,178 @@ def set_var_dict(name_ggcmi, outfile): "sdate": set_var_dict("planting_day", ""), "hdate": set_var_dict("maturity_day", ""), } + return variable_dict + + +def set_var_attrs(thisvar_da, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue): + """ + Set output variable attributes + """ + + longname = thisvar_da.attrs["long_name"] + longname = longname.replace("rainfed", thiscrop_clm).replace("irrigated", thiscrop_clm) + thisvar_da.attrs["long_name"] = longname + + if thiscrop_ggcmi is None: + thisvar_da.attrs["crop_name_clm"] = "none" + thisvar_da.attrs["crop_name_ggcmi"] = "none" + else: + thisvar_da.attrs["crop_name_clm"] = thiscrop_clm + thisvar_da.attrs["crop_name_ggcmi"] = thiscrop_ggcmi + + thisvar_da.attrs["short_name_ggcmi"] = varname_ggcmi + thisvar_da.attrs["units"] = "day of year" + thisvar_da.encoding["_FillValue"] = new_fillvalue + + # scale_factor and add_offset are required by I/O library for short data + # From https://www.unidata.ucar.edu/software/netcdf/workshops/2010/bestpractices/Packing.html: + # unpacked_value = packed_value * scale_factor + add_offset + thisvar_da.attrs["scale_factor"] = np.int16(1) + thisvar_da.attrs["add_offset"] = np.int16(0) + return thisvar_da + + +def fill_convert_int(thisvar_ds, thiscrop_ggcmi, varname_ggcmi, new_fillvalue): + """ + Ensure fill value and real data are correct format + """ + dummyvalue = -1 + thisvar_ds.variables[varname_ggcmi].encoding["_FillValue"] = new_fillvalue + if thiscrop_ggcmi is None: + thisvar_ds.variables[varname_ggcmi].values.fill(dummyvalue) + else: + thisvar_ds.variables[varname_ggcmi].values[ + np.isnan(thisvar_ds.variables[varname_ggcmi].values) + ] = new_fillvalue + thisvar_ds.variables[varname_ggcmi].values = thisvar_ds.variables[ + varname_ggcmi + ].values.astype("int16") + + return thisvar_ds + + +def add_time_dim(thisvar_ds, template_ds, varname_ggcmi, varname_clm): + """ + Add time dimension (https://stackoverflow.com/a/62862440) + - Repeats original map for every timestep + - Probably not necessary to use this method, since I only end up extracting thisvar_ds.values + anyway---I could probably use some numpy method instead. + """ + + thisvar_ds = thisvar_ds.expand_dims(time=template_ds.time) + thisvar_da_tmp = thisvar_ds[varname_ggcmi] + thisvar_da = xr.DataArray( + data=thisvar_da_tmp.values.astype("int16"), + attrs=thisvar_da_tmp.attrs, + coords=thisvar_da_tmp.coords, + name=varname_clm, + ) + + return thisvar_da + + +def create_output_files( + regrid_resolution, + variable_dict, + output_directory, + file_specifier, + first_year, + last_year, + template_ds, +): + """ + Create output files, one for each variable + """ + datetime_string = dt.datetime.now().strftime("%year%m%d_%H%M%S") + nninterp_suffix = "nninterp-" + regrid_resolution + for var in variable_dict: + basename = ( + f"{var}s_{file_specifier}_{nninterp_suffix}." + + f"{first_year}-{last_year}.{datetime_string}.nc" + ) + outfile = os.path.join(output_directory, basename) + variable_dict[var]["outfile"] = outfile + template_ds.to_netcdf( + path=variable_dict[var]["outfile"], + format="NETCDF3_CLASSIC", + ) + + return nninterp_suffix + + +def strip_dataset(cropcal_ds, varname_ggcmi): + """ + Remove all variables except one from Dataset + """ + droplist = [] + for i in list(cropcal_ds.keys()): + if i != varname_ggcmi: + droplist.append(i) + thisvar_ds = cropcal_ds.drop(droplist) + return thisvar_ds + + +def process_ggcmi_shdates( + input_directory, + output_directory, + author, + file_specifier, + first_year, + last_year, + ggcmi_author, + regrid_resolution, + regrid_template_file, + regrid_extension, + crop_list, +): + """ + Convert GGCMI crop calendar files for use in CTSM + """ + + input_directory = os.path.realpath(input_directory) + output_directory = os.path.realpath(output_directory) + + ############################################################ + ### Regrid original GGCMI files to target CLM resolution ### + ############################################################ + + regridded_ggcmi_files_dir = os.path.join( + output_directory, f"regridded_ggcmi_files-{regrid_resolution}" + ) + + regrid.regrid_ggcmi_shdates( + regrid_resolution, + regrid_template_file, + input_directory, + regridded_ggcmi_files_dir, + regrid_extension, + crop_list, + ) + + # Set up dictionaries used in remapping crops and variables between GGCMI and CLM + crop_dict = setup_crop_dict() + variable_dict = setup_var_dict() ################################ ### Instantiate output files ### ################################ # Global attributes for output files + comment = ( + "Day of year is 1-indexed (i.e., Jan. 1 = 1). " + + "Filled using cdo -remapnn,$original -setmisstonn" + ) out_attrs = { "title": "GGCMI crop calendar for Phase 3, v1.01", "author_thisfile": author, "author_original": ggcmi_author, - "comment": "Day of year is 1-indexed (i.e., Jan. 1 = 1). Filled using cdo -remapnn,$original -setmisstonn", + "comment": comment, "created": dt.datetime.now().replace(microsecond=0).astimezone().isoformat(), } # Create template dataset time_array = np.array( - [get_dayssince_jan1y1(first_year, y) for y in np.arange(first_year, last_year + 1)] + [get_dayssince_jan1y1(first_year, year) for year in np.arange(first_year, last_year + 1)] ) time_coord = xr.IndexVariable( "time", @@ -273,18 +433,15 @@ def set_var_dict(name_ggcmi, outfile): template_ds = xr.Dataset(coords={"time": time_coord}, attrs=out_attrs) # Create output files - datetime_string = dt.datetime.now().strftime("%Y%m%d_%H%M%S") - nninterp_suffix = "nninterp-" + regrid_resolution - for v in variable_dict: - outfile = os.path.join( - output_directory, - f"{v}s_{file_specifier}_{nninterp_suffix}.{first_year}-{last_year}.{datetime_string}.nc", - ) - variable_dict[v]["outfile"] = outfile - template_ds.to_netcdf( - path=variable_dict[v]["outfile"], - format="NETCDF3_CLASSIC", - ) + nninterp_suffix = create_output_files( + regrid_resolution, + variable_dict, + output_directory, + file_specifier, + first_year, + last_year, + template_ds, + ) ######################### ### Process all crops ### @@ -293,7 +450,7 @@ def set_var_dict(name_ggcmi, outfile): for thiscrop_clm in crop_dict: # Which crop are we on? - c = list(crop_dict.keys()).index(thiscrop_clm) + 1 + crop_int = list(crop_dict.keys()).index(thiscrop_clm) + 1 # Get information about this crop this_dict = crop_dict[thiscrop_clm] @@ -306,18 +463,24 @@ def set_var_dict(name_ggcmi, outfile): # If no corresponding GGCMI crop, skip opening dataset. # Will use previous cropcal_ds as a template. - if thiscrop_ggcmi == None: - if c == 1: + if thiscrop_ggcmi is None: + if crop_int == 1: raise ValueError(f"First crop ({thiscrop_clm}) must have a GGCMI type") logger.info( - "Filling %s with dummy data (%d of %d)..." % (str(thiscrop_clm), c, len(crop_dict)) + "Filling %s with dummy data (%d of %d)...", + str(thiscrop_clm), + crop_int, + len(crop_dict), ) # Otherwise, import crop calendar file else: logger.info( - "Importing %s -> %s (%d of %d)..." - % (str(thiscrop_ggcmi), str(thiscrop_clm), c, len(crop_dict)) + "Importing %s -> %s (%d of %d)...", + str(thiscrop_ggcmi), + str(thiscrop_clm), + crop_int, + len(crop_dict), ) file_ggcmi = os.path.join( @@ -326,7 +489,7 @@ def set_var_dict(name_ggcmi, outfile): ) if not os.path.exists(file_ggcmi): logger.warning( - f"Skipping {thiscrop_ggcmi} because input file not found: {file_ggcmi}" + "Skipping %s because input file not found: %s", thiscrop_ggcmi, file_ggcmi ) continue cropcal_ds = xr.open_dataset(file_ggcmi) @@ -338,7 +501,7 @@ def set_var_dict(name_ggcmi, outfile): for thisvar_clm in variable_dict: # Get GGCMI netCDF info varname_ggcmi = variable_dict[thisvar_clm]["name_ggcmi"] - logger.info(" Processing %s..." % varname_ggcmi) + logger.info(" Processing %s...", varname_ggcmi) # Get CLM netCDF info varname_clm = thisvar_clm + "1_" + str(thiscrop_int) @@ -347,69 +510,21 @@ def set_var_dict(name_ggcmi, outfile): raise Exception("Output file not found: " + file_clm) # Strip dataset to just this variable - droplist = [] - for i in list(cropcal_ds.keys()): - if i != varname_ggcmi: - droplist.append(i) - thisvar_ds = cropcal_ds.drop(droplist) - thisvar_ds = thisvar_ds.load() + strip_dataset(cropcal_ds, varname_ggcmi) # Convert to integer new_fillvalue = -1 - dummyvalue = -1 - thisvar_ds.variables[varname_ggcmi].encoding["_FillValue"] = new_fillvalue - if thiscrop_ggcmi == None: - thisvar_ds.variables[varname_ggcmi].values.fill(dummyvalue) - else: - thisvar_ds.variables[varname_ggcmi].values[ - np.isnan(thisvar_ds.variables[varname_ggcmi].values) - ] = new_fillvalue - thisvar_ds.variables[varname_ggcmi].values = thisvar_ds.variables[ - varname_ggcmi - ].values.astype("int16") + thisvar_ds = fill_convert_int(thisvar_ds, thiscrop_ggcmi, varname_ggcmi, new_fillvalue) # Add time dimension (https://stackoverflow.com/a/62862440) - # (Repeats original map for every timestep) - # Probably not necessary to use this method, since I only end up extracting thisvar_ds.values anyway---I could probably use some numpy method instead. - thisvar_ds = thisvar_ds.expand_dims(time=template_ds.time) - thisvar_da_tmp = thisvar_ds[varname_ggcmi] - thisvar_da = xr.DataArray( - data=thisvar_da_tmp.values.astype("int16"), - attrs=thisvar_da_tmp.attrs, - coords=thisvar_da_tmp.coords, - name=varname_clm, - ) - - # Edit/add variable attributes etc. - longname = thisvar_da.attrs["long_name"] - longname = longname.replace("rainfed", thiscrop_clm).replace("irrigated", thiscrop_clm) - - def set_var_attrs( - thisvar_da, longname, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue - ): - thisvar_da.attrs["long_name"] = longname - if thiscrop_ggcmi == None: - thisvar_da.attrs["crop_name_clm"] = "none" - thisvar_da.attrs["crop_name_ggcmi"] = "none" - else: - thisvar_da.attrs["crop_name_clm"] = thiscrop_clm - thisvar_da.attrs["crop_name_ggcmi"] = thiscrop_ggcmi - thisvar_da.attrs["short_name_ggcmi"] = varname_ggcmi - thisvar_da.attrs["units"] = "day of year" - thisvar_da.encoding["_FillValue"] = new_fillvalue - # scale_factor and add_offset are required by I/O library for short data - # From https://www.unidata.ucar.edu/software/netcdf/workshops/2010/bestpractices/Packing.html: - # unpacked_value = packed_value * scale_factor + add_offset - thisvar_da.attrs["scale_factor"] = np.int16(1) - thisvar_da.attrs["add_offset"] = np.int16(0) - return thisvar_da + thisvar_da = add_time_dim(thisvar_ds, template_ds, varname_ggcmi, varname_clm) thisvar_da = set_var_attrs( - thisvar_da, longname, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue + thisvar_da, thiscrop_clm, thiscrop_ggcmi, varname_ggcmi, new_fillvalue ) # Save - logger.info(" Saving %s..." % varname_ggcmi) + logger.info(" Saving %s...", varname_ggcmi) thisvar_da.to_netcdf(file_clm, mode="a", format="NETCDF3_CLASSIC") cropcal_ds.close() From 8347afc5dc85aae8738d466bd2c18d7f2d3651a4 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 15:16:11 -0700 Subject: [PATCH 39/85] Satisfy pylint for generate_gdds_functions.py. --- .../crop_calendars/generate_gdds_functions.py | 724 ++++++++++-------- 1 file changed, 417 insertions(+), 307 deletions(-) diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index cb05f1920d..74e8fd57f4 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -1,55 +1,77 @@ -import numpy as np -import xarray as xr +""" +Functions to support generate_gdds.py +""" +# pylint: disable=too-many-lines,too-many-statements import warnings import os +import sys import glob import datetime as dt from importlib import util as importlib_util +import numpy as np +import xarray as xr # Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script in the RUN phase seems to require the python/ directory to be manually added to path. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. _CTSM_PYTHON = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" ) -import sys - sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_utils as utils -import ctsm.crop_calendars.cropcal_module as cc +import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position +import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position -can_plot = True +CAN_PLOT = True try: + # pylint: disable=wildcard-import,unused-wildcard-import + # pylint: disable=import-error from ctsm.crop_calendars.cropcal_figs_module import * from matplotlib.transforms import Bbox warnings.filterwarnings( "ignore", - message="__len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry.", + message=( + "__len__ for multi-part geometries is deprecated and will be removed in Shapely " + + "2.0. Check the length of the `geoms` property instead to get the number of " + + "parts of a multi-part geometry." + ), ) warnings.filterwarnings( "ignore", - message="Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.", + message=( + "Iteration over multi-part geometries is deprecated and will be removed in Shapely " + + "2.0. Use the `geoms` property to access the constituent parts of a multi-part " + + "geometry." + ), ) print("Will (attempt to) produce harvest requirement map figure files.") -except: +except ModuleNotFoundError: print("Will NOT produce harvest requirement map figure files.") - can_plot = False + CAN_PLOT = False -# Functions to simultaneously print to console and to log file def log(logger, string): + """ + Simultaneously print INFO messages to console and to log file + """ print(string) logger.info(string) def error(logger, string): + """ + Simultaneously print ERROR messages to console and to log file + """ logger.error(string) raise RuntimeError(string) def check_sdates(dates_ds, sdates_rx, logger, verbose=False): + """ + Checking that input and output sdates match + """ log(logger, " Checking that input and output sdates match...") sdates_grid = utils.grid_one_variable(dates_ds, "SDATES") @@ -58,28 +80,28 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): any_found = False vegtypes_skipped = [] vegtypes_included = [] - for i, vt_str in enumerate(dates_ds.vegtype_str.values): + for i, vegtype_str in enumerate(dates_ds.vegtype_str.values): # Input - vt = dates_ds.ivt.values[i] - thisVar = f"gs1_{vt}" - if thisVar not in sdates_rx: - vegtypes_skipped = vegtypes_skipped + [vt_str] + vegtype_int = dates_ds.ivt.values[i] + this_var = f"gs1_{vegtype_int}" + if this_var not in sdates_rx: + vegtypes_skipped = vegtypes_skipped + [vegtype_str] # log(logger, f" {vt_str} ({vt}) SKIPPED...") continue - vegtypes_included = vegtypes_included + [vt_str] + vegtypes_included = vegtypes_included + [vegtype_str] any_found = True if verbose: - log(logger, f" {vt_str} ({vt})...") - in_map = sdates_rx[thisVar].squeeze(drop=True) + log(logger, f" {vegtype_str} ({vegtype_int})...") + in_map = sdates_rx[this_var].squeeze(drop=True) # Output - out_map = sdates_grid.sel(ivt_str=vt_str).squeeze(drop=True) + out_map = sdates_grid.sel(ivt_str=vegtype_str).squeeze(drop=True) # Check for differences diff_map = out_map - in_map diff_map_notnan = diff_map.values[np.invert(np.isnan(diff_map.values))] if np.any(diff_map_notnan): - log(logger, f"Difference(s) found in {vt_str}") + log(logger, f"Difference(s) found in {vegtype_str}") here = np.where(diff_map_notnan) log(logger, "in:") in_map_notnan = in_map.values[np.invert(np.isnan(diff_map.values))] @@ -91,7 +113,7 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): log(logger, diff_map_notnan[here][0:4]) all_ok = False - if not (any_found): + if not any_found: error(logger, "No matching variables found in sdates_rx!") # Sanity checks for included vegetation types @@ -102,7 +124,8 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): elif vegtypes_skipped_weird: log( logger, - f"\nWarning: Some crop types had output rainfed patches but no irrigated patches: {vegtypes_skipped_weird}", + "\nWarning: Some crop types had output rainfed patches but no irrigated patches: " + + f"{vegtypes_skipped_weird}", ) if all_ok: @@ -111,34 +134,42 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): error(logger, " ❌ Input and output sdates differ.") -def import_rx_dates(s_or_h, date_inFile, incl_patches1d_itype_veg, mxsowings, logger): - if isinstance(date_inFile, xr.Dataset): - return date_inFile - elif not isinstance(date_inFile, str): +def import_rx_dates(s_or_h, date_infile, incl_patches1d_itype_veg, mxsowings, logger): + """ + Import prescribed sowing or harvest dates + """ + if isinstance(date_infile, xr.Dataset): + return date_infile + if not isinstance(date_infile, str): error( logger, - f"Importing {s_or_h}dates_rx: Expected date_inFile to be str or DataArray, not {type(date_inFile)}", + f"Importing {s_or_h}dates_rx: Expected date_infile to be str or DataArray," + + f"not {type(date_infile)}", ) # Which vegetation types were simulated? - itype_veg_toImport = np.unique(incl_patches1d_itype_veg) + itype_veg_to_import = np.unique(incl_patches1d_itype_veg) - date_varList = [] - for i in itype_veg_toImport: - for g in np.arange(mxsowings): - thisVar = f"{s_or_h}date{g+1}_{i}" - date_varList = date_varList + [thisVar] + date_var_list = [] + for i in itype_veg_to_import: + for n_sowing in np.arange(mxsowings): + this_var = f"{s_or_h}date{n_sowing+1}_{i}" + date_var_list = date_var_list + [this_var] - ds = utils.import_ds(date_inFile, myVars=date_varList) + this_ds = utils.import_ds(date_infile, myVars=date_var_list) - for v in ds: - ds = ds.rename({v: v.replace(f"{s_or_h}date", "gs")}) + for var in this_ds: + this_ds = this_ds.rename({var: var.replace(f"{s_or_h}date", "gs")}) - return ds + return this_ds -def thisCrop_map_to_patches(lon_points, lat_points, map_ds, vegtype_int): - # xarray pointwise indexing; see https://xarray.pydata.org/en/stable/user-guide/indexing.html#more-advanced-indexing +def this_crop_map_to_patches(lon_points, lat_points, map_ds, vegtype_int): + """ + Given a map, get a vector of patches + """ + # xarray pointwise indexing; + # see https://xarray.pydata.org/en/stable/user-guide/indexing.html#more-advanced-indexing return ( map_ds[f"gs1_{vegtype_int}"] .sel(lon=xr.DataArray(lon_points, dims="patch"), lat=xr.DataArray(lat_points, dims="patch")) @@ -146,8 +177,10 @@ def thisCrop_map_to_patches(lon_points, lat_points, map_ds, vegtype_int): ) -# Get and grid mean GDDs in GGCMI growing season def yp_list_to_ds(yp_list, daily_ds, incl_vegtypes_str, dates_rx, longname_prefix, logger): + """ + Get and grid mean GDDs in GGCMI growing season + """ # Get means warnings.filterwarnings( "ignore", message="Mean of empty slice" @@ -160,44 +193,45 @@ def yp_list_to_ds(yp_list, daily_ds, incl_vegtypes_str, dates_rx, longname_prefi # Grid ds_out = xr.Dataset() - for c, ra in enumerate(p_list): - if isinstance(ra, type(None)): + for this_crop_int, data in enumerate(p_list): + if isinstance(data, type(None)): continue - thisCrop_str = incl_vegtypes_str[c] - log(logger, f" {thisCrop_str}...") - newVar = f"gdd1_{utils.ivt_str2int(thisCrop_str)}" - ds = daily_ds.isel( - patch=np.where(daily_ds.patches1d_itype_veg_str.values == thisCrop_str)[0] + this_crop_str = incl_vegtypes_str[this_crop_int] + log(logger, f" {this_crop_str}...") + new_var = f"gdd1_{utils.ivt_str2int(this_crop_str)}" + this_ds = daily_ds.isel( + patch=np.where(daily_ds.patches1d_itype_veg_str.values == this_crop_str)[0] ) - template_da = ds.patches1d_itype_veg_str - da = xr.DataArray( - data=ra, + template_da = this_ds.patches1d_itype_veg_str + this_da = xr.DataArray( + data=data, coords=template_da.coords, - attrs={"units": "GDD", "long_name": f"{longname_prefix}{thisCrop_str}"}, + attrs={"units": "GDD", "long_name": f"{longname_prefix}{this_crop_str}"}, ) # Grid this crop - ds["tmp"] = da - da_gridded = utils.grid_one_variable(ds, "tmp", vegtype=thisCrop_str).squeeze(drop=True) + this_ds["tmp"] = this_da + da_gridded = utils.grid_one_variable(this_ds, "tmp", vegtype=this_crop_str) + da_gridded = da_gridded.squeeze(drop=True) # Add singleton time dimension and save to output Dataset da_gridded = da_gridded.expand_dims(time=dates_rx.time) - ds_out[newVar] = da_gridded + ds_out[new_var] = da_gridded return ds_out def import_and_process_1yr( - y1, - yN, - y, - thisYear, + year_1, + year_n, + year_index, + this_year, sdates_rx, hdates_rx, gddaccum_yp_list, gddharv_yp_list, - skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + skip_patches_for_isel_nan_last_year, + last_year_active_patch_indices_list, incorrectly_daily, indir, incl_vegtypes_str_in, @@ -207,8 +241,11 @@ def import_and_process_1yr( skip_crops, logger, ): + """ + Import one year of CLM output data for GDD generation + """ save_figs = True - log(logger, f"netCDF year {thisYear}...") + log(logger, f"netCDF year {this_year}...") log(logger, dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # Without dask, this can take a LONG time at resolutions finer than 2-deg @@ -237,7 +274,7 @@ def import_and_process_1yr( h1_filelist, myVars=["SDATES", "HDATES"], myVegtypes=crops_to_read, - timeSlice=slice(f"{thisYear}-01-01", f"{thisYear}-12-31"), + timeSlice=slice(f"{this_year}-01-01", f"{this_year}-12-31"), chunks=chunks, ) @@ -261,8 +298,8 @@ def import_and_process_1yr( np.sum(~np.isnan(dates_ds.HDATES.values), axis=dates_ds.HDATES.dims.index("mxharvests")) == 0 ) - N_unmatched_nans = np.sum(sdates_all_nan != hdates_all_nan) - if N_unmatched_nans > 0: + n_unmatched_nans = np.sum(sdates_all_nan != hdates_all_nan) + if n_unmatched_nans > 0: error(logger, "Output SDATE and HDATE NaN masks do not match.") if np.sum(~np.isnan(dates_ds.SDATES.values)) == 0: error(logger, "All SDATES are NaN!") @@ -270,15 +307,15 @@ def import_and_process_1yr( # Just work with non-NaN patches for now skip_patches_for_isel_nan = np.where(sdates_all_nan)[0] incl_patches_for_isel_nan = np.where(~sdates_all_nan)[0] - different_nan_mask = y > 0 and not np.array_equal( - skip_patches_for_isel_nan_lastyear, skip_patches_for_isel_nan + different_nan_mask = year_index > 0 and not np.array_equal( + skip_patches_for_isel_nan_last_year, skip_patches_for_isel_nan ) if different_nan_mask: log(logger, " Different NaN mask than last year") incl_thisyr_but_nan_lastyr = [ dates_ds.patch.values[p] for p in incl_patches_for_isel_nan - if p in skip_patches_for_isel_nan_lastyear + if p in skip_patches_for_isel_nan_last_year ] else: incl_thisyr_but_nan_lastyr = [] @@ -286,14 +323,15 @@ def import_and_process_1yr( if skipping_patches_for_isel_nan: log( logger, - f" Ignoring {len(skip_patches_for_isel_nan)} patches with all-NaN sowing and harvest dates.", + f" Ignoring {len(skip_patches_for_isel_nan)} patches with all-NaN sowing and " + + "harvest dates.", ) dates_incl_ds = dates_ds.isel(patch=incl_patches_for_isel_nan) else: dates_incl_ds = dates_ds incl_patches1d_itype_veg = dates_incl_ds.patches1d_itype_veg - if y == 0: + if year_index == 0: incl_vegtypes_str = [c for c in dates_incl_ds.vegtype_str.values if c not in skip_crops] else: incl_vegtypes_str = incl_vegtypes_str_in @@ -304,13 +342,15 @@ def import_and_process_1yr( if incl_vegtypes_str != list(dates_incl_ds.vegtype_str.values): error( logger, - f"Included veg types differ. Previously {incl_vegtypes_str}, now {dates_incl_ds.vegtype_str.values}", + f"Included veg types differ. Previously {incl_vegtypes_str}, " + + f"now {dates_incl_ds.vegtype_str.values}", ) if np.sum(~np.isnan(dates_incl_ds.SDATES.values)) == 0: error(logger, "All SDATES are NaN after ignoring those patches!") - # Some patches can have -1 sowing date?? Hopefully just an artifact of me incorrectly saving SDATES/HDATES daily. + # Some patches can have -1 sowing date?? Hopefully just an artifact of me incorrectly saving + # SDATES/HDATES daily. mxsowings = dates_ds.dims["mxsowings"] mxsowings_dim = dates_ds.SDATES.dims.index("mxsowings") skip_patches_for_isel_sdatelt1 = np.where(dates_incl_ds.SDATES.values < 1)[1] @@ -322,7 +362,8 @@ def import_and_process_1yr( if incorrectly_daily and list(unique_hdates) == [364]: log( logger, - f" ❗ {len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1, but this might have just been because of incorrectly daily outputs. Setting them to 365.", + f" ❗ {len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1, but this" + + "might have just been because of incorrectly daily outputs. Setting them to 365.", ) new_sdates_ar = dates_incl_ds.SDATES.values if mxsowings_dim != 0: @@ -336,13 +377,16 @@ def import_and_process_1yr( else: error( logger, - f"{len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1. Unique affected hdates: {unique_hdates}", + f"{len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1. " + + f"Unique affected hdates: {unique_hdates}", ) - # Some patches can have -1 harvest date?? Hopefully just an artifact of me incorrectly saving SDATES/HDATES daily. Can also happen if patch wasn't active last year + # Some patches can have -1 harvest date?? Hopefully just an artifact of me incorrectly saving + # SDATES/HDATES daily. Can also happen if patch wasn't active last year mxharvests = dates_ds.dims["mxharvests"] mxharvests_dim = dates_ds.HDATES.dims.index("mxharvests") - # If a patch was inactive last year but was either (a) harvested the last time it was active or (b) was never active, it will have -1 as its harvest date this year. Such instances are okay. + # If a patch was inactive last year but was either (a) harvested the last time it was active or + # (b) was never active, it will have -1 as its harvest date this year. Such instances are okay. hdates_thisyr = dates_incl_ds.HDATES.isel(mxharvests=0) skip_patches_for_isel_hdatelt1 = np.where(hdates_thisyr.values < 1)[0] skipping_patches_for_isel_hdatelt1 = len(skip_patches_for_isel_hdatelt1) > 0 @@ -352,7 +396,6 @@ def import_and_process_1yr( patch=incl_thisyr_but_nan_lastyr ) if np.any(hdates_thisyr_where_nan_lastyr < 1): - # patches_to_fix = hdates_thisyr_where_nan_lastyr.isel(patch=np.where(hdates_thisyr_where_nan_lastyr < 1)[0]).patch.values new_hdates = dates_incl_ds.HDATES.values if mxharvests_dim != 0: error(logger, "Code this up") @@ -360,7 +403,10 @@ def import_and_process_1yr( here = [patch_list.index(x) for x in incl_thisyr_but_nan_lastyr] log( logger, - f" ❗ {len(here)} patches have harvest date -1 because they weren't active last year (and were either never active or were harvested when last active). Ignoring, but you should have done a run with patches always active if they are ever active in the real LU timeseries.", + f" ❗ {len(here)} patches have harvest date -1 because they weren't active last" + + "year (and were either never active or were harvested when last active). " + + "Ignoring, but you should have done a run with patches always active if they are " + + "ever active in the real LU timeseries.", ) new_hdates[0, here] = sdates_thisyr_where_nan_lastyr.values - 1 dates_incl_ds["HDATES"] = xr.DataArray( @@ -382,7 +428,9 @@ def import_and_process_1yr( if incorrectly_daily and list(unique_sdates) == [1]: log( logger, - f" ❗ {len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1??? Seems like this might have just been because of incorrectly daily outputs; setting them to 365.", + f" ❗ {len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1??? Seems like " + + "this might have just been because of incorrectly daily outputs; setting them to " + + "365.", ) new_hdates_ar = dates_incl_ds.HDATES.values if mxharvests_dim != 0: @@ -396,18 +444,21 @@ def import_and_process_1yr( else: error( logger, - f"{len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1. Possible causes:\n * Not using constant crop areas (e.g., flanduse_timeseries from make_lu_for_gddgen.py)\n * Not skipping the first 2 years of output\nUnique affected sdates: {unique_sdates}", + f"{len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1. Possible causes:\n" + + "* Not using constant crop areas (e.g., flanduse_timeseries from " + + "make_lu_for_gddgen.py)\n * Not skipping the first 2 years of output\n" + + f"Unique affected sdates: {unique_sdates}", ) # Make sure there was only one harvest per year - N_extra_harv = np.sum( + n_extra_harv = np.sum( np.nanmax( dates_incl_ds.HDATES.isel(mxharvests=slice(1, mxharvests)).values, axis=mxharvests_dim ) >= 1 ) - if N_extra_harv > 0: - error(logger, f"{N_extra_harv} patches have >1 harvest.") + if n_extra_harv > 0: + error(logger, f"{n_extra_harv} patches have >1 harvest.") # Make sure harvest happened the day before sowing sdates_clm = dates_incl_ds.SDATES.values.squeeze() @@ -432,13 +483,13 @@ def import_and_process_1yr( if mxmats and (imported_sdates or imported_hdates): print(" Limiting growing season length...") hdates_rx = hdates_rx_orig.copy() - for v in hdates_rx_orig: - if v == "time_bounds": + for var in hdates_rx_orig: + if var == "time_bounds": continue # Get max growing season length vegtype_int = int( - v.split("_")[1] + var.split("_")[1] ) # netCDF variable name v should be something like gs1_17 vegtype_str = utils.ivt_int2str(vegtype_int) if vegtype_str == "soybean": @@ -452,41 +503,45 @@ def import_and_process_1yr( continue # Get "prescribed" growing season length - gs_len_rx_da = get_gs_len_da(hdates_rx_orig[v] - sdates_rx[v]) + gs_len_rx_da = get_gs_len_da(hdates_rx_orig[var] - sdates_rx[var]) not_ok = gs_len_rx_da.values > mxmat if not np.any(not_ok): print(f" Not limiting {vegtype_str}: No rx season > {mxmat} days") continue - hdates_limited = hdates_rx_orig[v].copy().values - hdates_limited[np.where(not_ok)] = sdates_rx[v].values[np.where(not_ok)] + mxmat + hdates_limited = hdates_rx_orig[var].copy().values + hdates_limited[np.where(not_ok)] = sdates_rx[var].values[np.where(not_ok)] + mxmat hdates_limited[np.where(hdates_limited > 365)] -= 365 if np.any(hdates_limited < 1): raise RuntimeError("Limited hdates < 1") - elif np.any(hdates_limited > 365): + if np.any(hdates_limited > 365): raise RuntimeError("Limited hdates > 365") - hdates_rx[v] = xr.DataArray( - data=hdates_limited, coords=hdates_rx_orig[v].coords, attrs=hdates_rx_orig[v].attrs + hdates_rx[var] = xr.DataArray( + data=hdates_limited, + coords=hdates_rx_orig[var].coords, + attrs=hdates_rx_orig[var].attrs, ) print( - f" Limited {vegtype_str} growing season length to {mxmat}. Longest was {int(np.max(gs_len_rx_da.values))}, now {int(np.max(get_gs_len_da(hdates_rx[v] - sdates_rx[v]).values))}." + f" Limited {vegtype_str} growing season length to {mxmat}. Longest was " + + f"{int(np.max(gs_len_rx_da.values))}, now " + + f"{int(np.max(get_gs_len_da(hdates_rx[var] - sdates_rx[var]).values))}." ) else: hdates_rx = hdates_rx_orig - log(logger, f" Importing accumulated GDDs...") + log(logger, " Importing accumulated GDDs...") clm_gdd_var = "GDDACCUM" - myVars = [clm_gdd_var, "GDDHARV"] - pattern = os.path.join(indir, f"*h2.{thisYear-1}-01-01*.nc") + my_vars = [clm_gdd_var, "GDDHARV"] + pattern = os.path.join(indir, f"*h2.{this_year-1}-01-01*.nc") h2_files = glob.glob(pattern) if not h2_files: - pattern = os.path.join(indir, f"*h2.{thisYear-1}-01-01*.nc.base") + pattern = os.path.join(indir, f"*h2.{this_year-1}-01-01*.nc.base") h2_files = glob.glob(pattern) if not h2_files: - error(logger, f"No files found matching pattern '*h2.{thisYear-1}-01-01*.nc(.base)'") + error(logger, f"No files found matching pattern '*h2.{this_year-1}-01-01*.nc(.base)'") h2_ds = utils.import_ds( h2_files, - myVars=myVars, + myVars=my_vars, myVegtypes=crops_to_read, chunks=chunks, ) @@ -503,181 +558,209 @@ def import_and_process_1yr( error(logger, f"All {clm_gdd_var} values are zero!") # Get standard datetime axis for outputs - Nyears = yN - y1 + 1 + n_years = year_n - year_1 + 1 if len(gddaccum_yp_list) == 0: - lastYear_active_patch_indices_list = [None for vegtype_str in incl_vegtypes_str] + last_year_active_patch_indices_list = [None for vegtype_str in incl_vegtypes_str] gddaccum_yp_list = [None for vegtype_str in incl_vegtypes_str] if save_figs: gddharv_yp_list = [None for vegtype_str in incl_vegtypes_str] incl_vegtype_indices = [] - for v, vegtype_str in enumerate(incl_vegtypes_str): + for var, vegtype_str in enumerate(incl_vegtypes_str): if vegtype_str in skip_crops: log(logger, f" SKIPPING {vegtype_str}") continue vegtype_int = utils.vegtype_str2int(vegtype_str)[0] - thisCrop_full_patchlist = list(utils.xr_flexsel(h2_ds, vegtype=vegtype_str).patch.values) + this_crop_full_patchlist = list(utils.xr_flexsel(h2_ds, vegtype=vegtype_str).patch.values) # Get time series for each patch of this type - thisCrop_ds = utils.xr_flexsel(h2_incl_ds, vegtype=vegtype_str) - thisCrop_gddaccum_da = thisCrop_ds[clm_gdd_var] + this_crop_ds = utils.xr_flexsel(h2_incl_ds, vegtype=vegtype_str) + this_crop_gddaccum_da = this_crop_ds[clm_gdd_var] if save_figs: - thisCrop_gddharv_da = thisCrop_ds["GDDHARV"] - if not thisCrop_gddaccum_da.size: + this_crop_gddharv_da = this_crop_ds["GDDHARV"] + if not this_crop_gddaccum_da.size: continue log(logger, f" {vegtype_str}...") - incl_vegtype_indices = incl_vegtype_indices + [v] + incl_vegtype_indices = incl_vegtype_indices + [var] # Get prescribed harvest dates for these patches - lon_points = thisCrop_ds.patches1d_lon.values - lat_points = thisCrop_ds.patches1d_lat.values - thisCrop_hdates_rx = thisCrop_map_to_patches(lon_points, lat_points, hdates_rx, vegtype_int) + lon_points = this_crop_ds.patches1d_lon.values + lat_points = this_crop_ds.patches1d_lat.values + this_crop_hdates_rx = this_crop_map_to_patches( + lon_points, lat_points, hdates_rx, vegtype_int + ) - if isinstance(gddaccum_yp_list[v], type(None)): - gddaccum_yp_list[v] = np.full((Nyears + 1, len(thisCrop_full_patchlist)), np.nan) + if isinstance(gddaccum_yp_list[var], type(None)): + gddaccum_yp_list[var] = np.full((n_years + 1, len(this_crop_full_patchlist)), np.nan) if save_figs: - gddharv_yp_list[v] = np.full((Nyears + 1, len(thisCrop_full_patchlist)), np.nan) + gddharv_yp_list[var] = np.full((n_years + 1, len(this_crop_full_patchlist)), np.nan) # Get the accumulated GDDs at each prescribed harvest date - gddaccum_atharv_p = np.full(thisCrop_hdates_rx.shape, np.nan) + gddaccum_atharv_p = np.full(this_crop_hdates_rx.shape, np.nan) if save_figs: - gddharv_atharv_p = np.full(thisCrop_hdates_rx.shape, np.nan) - unique_rx_hdates = np.unique(thisCrop_hdates_rx.values) + gddharv_atharv_p = np.full(this_crop_hdates_rx.shape, np.nan) + unique_rx_hdates = np.unique(this_crop_hdates_rx.values) # Build an indexing tuple patches = [] i_patches = [] i_times = [] - for i, hdate in enumerate(unique_rx_hdates): - here = np.where(thisCrop_hdates_rx.values == hdate)[0] - patches += list(thisCrop_gddaccum_da.patch.values[here]) + for hdate in unique_rx_hdates: + here = np.where(this_crop_hdates_rx.values == hdate)[0] + patches += list(this_crop_gddaccum_da.patch.values[here]) i_patches += list(here) i_times += list(np.full((len(here),), int(hdate - 1))) # Sort back to correct order if not np.all( - thisCrop_gddaccum_da.patch.values[:-1] <= thisCrop_gddaccum_da.patch.values[1:] + this_crop_gddaccum_da.patch.values[:-1] <= this_crop_gddaccum_da.patch.values[1:] ): error(logger, "This code depends on DataArray patch list being sorted.") sortorder = np.argsort(patches) i_patches = list(np.array(i_patches)[np.array(sortorder)]) i_times = list(np.array(i_times)[np.array(sortorder)]) # Select using the indexing tuple - gddaccum_atharv_p = thisCrop_gddaccum_da.values[(i_times, i_patches)] + gddaccum_atharv_p = this_crop_gddaccum_da.values[(i_times, i_patches)] if save_figs: - gddharv_atharv_p = thisCrop_gddharv_da.values[(i_times, i_patches)] + gddharv_atharv_p = this_crop_gddharv_da.values[(i_times, i_patches)] if np.any(np.isnan(gddaccum_atharv_p)): log( logger, - f" ❗ {np.sum(np.isnan(gddaccum_atharv_p))}/{len(gddaccum_atharv_p)} NaN after extracting GDDs accumulated at harvest", + f" ❗ {np.sum(np.isnan(gddaccum_atharv_p))}/{len(gddaccum_atharv_p)} " + + "NaN after extracting GDDs accumulated at harvest", ) if save_figs and np.any(np.isnan(gddharv_atharv_p)): log( logger, - f" ❗ {np.sum(np.isnan(gddharv_atharv_p))}/{len(gddharv_atharv_p)} NaN after extracting GDDHARV", + f" ❗ {np.sum(np.isnan(gddharv_atharv_p))}/{len(gddharv_atharv_p)} " + + "NaN after extracting GDDHARV", ) # Assign these to growing seasons based on whether gs crossed new year - thisYear_active_patch_indices = [ - thisCrop_full_patchlist.index(x) for x in thisCrop_ds.patch.values + this_year_active_patch_indices = [ + this_crop_full_patchlist.index(x) for x in this_crop_ds.patch.values ] - thisCrop_sdates_rx = thisCrop_map_to_patches(lon_points, lat_points, sdates_rx, vegtype_int) - where_gs_thisyr = np.where(thisCrop_sdates_rx < thisCrop_hdates_rx)[0] - tmp_gddaccum = np.full(thisCrop_sdates_rx.shape, np.nan) + this_crop_sdates_rx = this_crop_map_to_patches( + lon_points, lat_points, sdates_rx, vegtype_int + ) + where_gs_thisyr = np.where(this_crop_sdates_rx < this_crop_hdates_rx)[0] + tmp_gddaccum = np.full(this_crop_sdates_rx.shape, np.nan) tmp_gddaccum[where_gs_thisyr] = gddaccum_atharv_p[where_gs_thisyr] if save_figs: tmp_gddharv = np.full(tmp_gddaccum.shape, np.nan) tmp_gddharv[where_gs_thisyr] = gddharv_atharv_p[where_gs_thisyr] - if y > 0: - lastYear_active_patch_indices = lastYear_active_patch_indices_list[v] - where_gs_lastyr = np.where(thisCrop_sdates_rx > thisCrop_hdates_rx)[0] - active_thisYear_where_gs_lastyr_indices = [ - thisYear_active_patch_indices[x] for x in where_gs_lastyr + if year_index > 0: + last_year_active_patch_indices = last_year_active_patch_indices_list[var] + where_gs_lastyr = np.where(this_crop_sdates_rx > this_crop_hdates_rx)[0] + active_this_year_where_gs_lastyr_indices = [ + this_year_active_patch_indices[x] for x in where_gs_lastyr ] - if not np.array_equal(lastYear_active_patch_indices, thisYear_active_patch_indices): + if not np.array_equal(last_year_active_patch_indices, this_year_active_patch_indices): if incorrectly_daily: log( logger, - " ❗ This year's active patch indices differ from last year's. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ This year's active patch indices differ from last year's. " + + "Allowing because this might just be an artifact of incorrectly daily " + + "outputs, BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "This year's active patch indices differ from last year's.") # Make sure we're not about to overwrite any existing values. if np.any( - ~np.isnan(gddaccum_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + ~np.isnan( + gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected non-NaN for last season's GDD accumulation. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected non-NaN for last season's GDD accumulation. " + + "Allowing because this might just be an artifact of incorrectly daily " + + "outputs, BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected non-NaN for last season's GDD accumulation") if save_figs and np.any( - ~np.isnan(gddharv_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + ~np.isnan( + gddharv_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected non-NaN for last season's GDDHARV. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected non-NaN for last season's GDDHARV. Allowing " + + "because this might just be an artifact of incorrectly daily outputs, " + + "BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected non-NaN for last season's GDDHARV") # Fill. - gddaccum_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices] = gddaccum_atharv_p[ - where_gs_lastyr - ] + gddaccum_yp_list[var][ + year_index - 1, active_this_year_where_gs_lastyr_indices + ] = gddaccum_atharv_p[where_gs_lastyr] if save_figs: - gddharv_yp_list[v][ - y - 1, active_thisYear_where_gs_lastyr_indices + gddharv_yp_list[var][ + year_index - 1, active_this_year_where_gs_lastyr_indices ] = gddharv_atharv_p[where_gs_lastyr] # Last year's season should be filled out now; make sure. if np.any( - np.isnan(gddaccum_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + np.isnan( + gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected NaN for last season's GDD accumulation. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected NaN for last season's GDD accumulation. Allowing " + + "because this might just be an artifact of incorrectly daily outputs, " + + "BUT RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected NaN for last season's GDD accumulation.") if save_figs and np.any( - np.isnan(gddharv_yp_list[v][y - 1, active_thisYear_where_gs_lastyr_indices]) + np.isnan( + gddharv_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] + ) ): if incorrectly_daily: log( logger, - " ❗ Unexpected NaN for last season's GDDHARV. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ Unexpected NaN for last season's GDDHARV. Allowing because " + + "this might just be an artifact of incorrectly daily outputs, BUT " + + "RESULTS MUST NOT BE TRUSTED.", ) else: error(logger, "Unexpected NaN for last season's GDDHARV.") - gddaccum_yp_list[v][y, thisYear_active_patch_indices] = tmp_gddaccum + gddaccum_yp_list[var][year_index, this_year_active_patch_indices] = tmp_gddaccum if save_figs: - gddharv_yp_list[v][y, thisYear_active_patch_indices] = tmp_gddharv + gddharv_yp_list[var][year_index, this_year_active_patch_indices] = tmp_gddharv - # Make sure that NaN masks are the same for this year's sdates and 'filled-out' GDDs from last year - if y > 0: + # Make sure that NaN masks are the same for this year's sdates and 'filled-out' GDDs from + # last year + if year_index > 0: nanmask_output_sdates = np.isnan( dates_ds.SDATES.isel( mxsowings=0, patch=np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] ).values ) - nanmask_output_gdds_lastyr = np.isnan(gddaccum_yp_list[v][y - 1, :]) + nanmask_output_gdds_lastyr = np.isnan(gddaccum_yp_list[var][year_index - 1, :]) if not np.array_equal(nanmask_output_gdds_lastyr, nanmask_output_sdates): if incorrectly_daily: log( logger, - " ❗ NaN masks differ between this year's sdates and 'filled-out' GDDs from last year. Allowing because this might just be an artifact of incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", + " ❗ NaN masks differ between this year's sdates and 'filled-out' " + + "GDDs from last year. Allowing because this might just be an artifact of " + + "incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", ) else: error( logger, - "NaN masks differ between this year's sdates and 'filled-out' GDDs from last year", + "NaN masks differ between this year's sdates and 'filled-out' GDDs from " + + "last year", ) - lastYear_active_patch_indices_list[v] = thisYear_active_patch_indices + last_year_active_patch_indices_list[var] = this_year_active_patch_indices - skip_patches_for_isel_nan_lastyear = skip_patches_for_isel_nan + skip_patches_for_isel_nan_last_year = skip_patches_for_isel_nan # Could save space by only saving variables needed for gridding log(logger, " Saving h2_ds...") @@ -689,8 +772,8 @@ def import_and_process_1yr( hdates_rx, gddaccum_yp_list, gddharv_yp_list, - skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + skip_patches_for_isel_nan_last_year, + last_year_active_patch_indices_list, incorrectly_daily, incl_vegtypes_str, incl_patches1d_itype_veg, @@ -698,35 +781,37 @@ def import_and_process_1yr( ) -def get_multicrop_maps(ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units): +def get_multicrop_maps(this_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units): + # pylint: disable=missing-function-docstring # Get GDDs for these crops - da_eachCFT = xr.concat((ds[x] for i, x in enumerate(theseVars)), dim="cft") - if "time" in ds.dims: - da_eachCFT = da_eachCFT.isel(time=0, drop=True) - da_eachCFT = da_eachCFT.where(da_eachCFT != dummy_fill) - da_eachCFT.attrs["units"] = gdd_units + da_each_cft = xr.concat((this_ds[x] for i, x in enumerate(these_vars)), dim="cft") + if "time" in this_ds.dims: + da_each_cft = da_each_cft.isel(time=0, drop=True) + da_each_cft = da_each_cft.where(da_each_cft != dummy_fill) + da_each_cft.attrs["units"] = gdd_units # What are the maximum differences seen between different crop types? - if len(theseVars) > 1: - maxDiff = np.nanmax(da_eachCFT.max(dim="cft") - da_eachCFT.min(dim="cft")) - if maxDiff > 0: - print(f" Max difference among crop types: {np.round(maxDiff)}") + if len(these_vars) > 1: + max_diff = np.nanmax(da_each_cft.max(dim="cft") - da_each_cft.min(dim="cft")) + if max_diff > 0: + print(f" Max difference among crop types: {np.round(max_diff)}") if crop_fracs_yx is None: - return da_eachCFT.isel(cft=0, drop=True) + return da_each_cft.isel(cft=0, drop=True) # Warn if GDD is NaN anywhere that there is area - da_eachCFT["cft"] = crop_fracs_yx["cft"] - gddNaN_areaPos = np.isnan(da_eachCFT) & (crop_fracs_yx > 0) - if np.any(gddNaN_areaPos): - total_bad_croparea = np.nansum(crop_fracs_yx.where(gddNaN_areaPos).values) + da_each_cft["cft"] = crop_fracs_yx["cft"] + gdd_nan_area_pos = np.isnan(da_each_cft) & (crop_fracs_yx > 0) + if np.any(gdd_nan_area_pos): + total_bad_croparea = np.nansum(crop_fracs_yx.where(gdd_nan_area_pos).values) total_croparea = np.nansum(crop_fracs_yx.values) print( - f" GDD reqt NaN but area positive ({np.round(total_bad_croparea/total_croparea*100, 1)}% of this crop's area)" + " GDD reqt NaN but area positive " + + f"({np.round(total_bad_croparea/total_croparea*100, 1)}% of this crop's area)" ) # Get areas and weights, masking cell-crops with NaN GDDs - crop_fracs_yx = crop_fracs_yx.where(~np.isnan(da_eachCFT)) + crop_fracs_yx = crop_fracs_yx.where(~np.isnan(da_each_cft)) crop_area_yx = crop_fracs_yx.sum(dim="cft") weights_yx = crop_fracs_yx / crop_area_yx weights_sum_gt0 = weights_yx.sum(dim="cft").where(weights_yx > 0) @@ -734,45 +819,48 @@ def get_multicrop_maps(ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units): assert np.isclose(np.nanmax(weights_sum_gt0.values), 1.0) # Mask GDDs and weights where there is no area - da_eachCFT = da_eachCFT.where(crop_fracs_yx > 0) - if len(theseVars) == 1: - return da_eachCFT.isel(cft=0, drop=True) + da_each_cft = da_each_cft.where(crop_fracs_yx > 0) + if len(these_vars) == 1: + return da_each_cft.isel(cft=0, drop=True) weights_yx = weights_yx.where(crop_fracs_yx > 0) weights_sum = weights_yx.sum(dim="cft").where(crop_area_yx > 0) assert np.isclose(np.nanmin(weights_sum.values), 1.0) assert np.isclose(np.nanmax(weights_sum.values), 1.0) # Ensure grid match between GDDs and weights - if not np.array_equal(da_eachCFT["lon"].values, weights_yx["lon"].values): + if not np.array_equal(da_each_cft["lon"].values, weights_yx["lon"].values): raise RuntimeError("lon mismatch") - if not np.array_equal(da_eachCFT["lat"].values, weights_yx["lat"].values): + if not np.array_equal(da_each_cft["lat"].values, weights_yx["lat"].values): raise RuntimeError("lat mismatch") # Get area-weighted mean GDD requirements for all crops - da = (da_eachCFT * weights_yx).sum(dim="cft") - da.attrs["units"] = gdd_units - da = da.where(crop_area_yx > 0) + this_da = (da_each_cft * weights_yx).sum(dim="cft") + this_da.attrs["units"] = gdd_units + this_da = this_da.where(crop_area_yx > 0) # Ensure that weighted mean is between each cell's min and max - whereBad = (da < da_eachCFT.min(dim="cft")) | (da > da_eachCFT.max(dim="cft")) - if np.any(whereBad): - where_belowMin = da.where(da < da_eachCFT.min(dim="cft")) - worst_belowMin = np.min((da_eachCFT.min(dim="cft") - where_belowMin).values) - where_aboveMax = da.where(da > da_eachCFT.max(dim="cft")) - worst_aboveMax = np.max((where_aboveMax - da_eachCFT.max(dim="cft")).values) - worst = max(worst_belowMin, worst_aboveMax) + where_bad = (this_da < da_each_cft.min(dim="cft")) | (this_da > da_each_cft.max(dim="cft")) + if np.any(where_bad): + where_below_min = this_da.where(this_da < da_each_cft.min(dim="cft")) + worst_below_min = np.min((da_each_cft.min(dim="cft") - where_below_min).values) + where_above_max = this_da.where(this_da > da_each_cft.max(dim="cft")) + worst_above_max = np.max((where_above_max - da_each_cft.max(dim="cft")).values) + worst = max(worst_below_min, worst_above_max) tol = 1e-12 if worst > 1e-12: raise RuntimeError( f"Some value is outside expected range by {worst} (exceeds tolerance {tol})" ) - return da + return this_da -if can_plot: +if CAN_PLOT: def get_bounds_ncolors(gdd_spacing, diff_map_yx): + """ + Get information about color bar + """ vmax = np.floor(np.nanmax(diff_map_yx.values) / gdd_spacing) * gdd_spacing vmin = -vmax epsilon = np.nextafter(0, 1) @@ -781,11 +869,11 @@ def get_bounds_ncolors(gdd_spacing, diff_map_yx): bounds.remove(0) bounds[bounds.index(-gdd_spacing)] /= 2 bounds[bounds.index(gdd_spacing)] /= 2 - Ncolors = len(bounds) + 1 - return vmax, bounds, Ncolors + n_colors = len(bounds) + 1 + return vmax, bounds, n_colors - def make_map( - ax, + def make_gengdd_map( + this_axis, this_map, this_title, vmax, @@ -798,11 +886,14 @@ def make_map( cbar_ticks=None, vmin=None, ): + """ + Make maps + """ if bounds: if not cmap: raise RuntimeError("Calling make_map() with bounds requires cmap to be specified") norm = mcolors.BoundaryNorm(bounds, cmap.N, extend=extend) - im1 = ax.pcolormesh( + im1 = this_axis.pcolormesh( this_map.lon.values, this_map.lat.values, this_map, @@ -817,11 +908,11 @@ def make_map( if vmin is not None: raise RuntimeError("Do not specify vmin in this call of make_map()") vmin = -vmax - Ncolors = vmax / gdd_spacing - if Ncolors % 2 == 0: - Ncolors += 1 + n_colors = vmax / gdd_spacing + if n_colors % 2 == 0: + n_colors += 1 if not cmap: - cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], Ncolors) + cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], n_colors) if np.any(this_map.values > vmax) and np.any(this_map.values < vmin): extend = "both" @@ -838,15 +929,15 @@ def make_map( else: vmin = np.floor(vmin / 500) * 500 vmax = np.floor(vmax / 500) * 500 - Ncolors = int(vmax / 500) + n_colors = int(vmax / 500) if not cmap: - cmap = cm.get_cmap(cropcal_colors["seq_other"], Ncolors + 1) + cmap = cm.get_cmap(cropcal_colors["seq_other"], n_colors + 1) extend = "max" extend_color = cmap.colors[-1] - cmap = mcolors.ListedColormap(cmap.colors[:Ncolors]) + cmap = mcolors.ListedColormap(cmap.colors[:n_colors]) cmap.set_over(extend_color) - im1 = ax.pcolormesh( + im1 = this_axis.pcolormesh( this_map.lon.values, this_map.lat.values, this_map, @@ -856,9 +947,9 @@ def make_map( cmap=cmap, ) - ax.set_extent([-180, 180, -63, 90], crs=ccrs.PlateCarree()) - ax.coastlines(linewidth=0.3) - ax.set_title(this_title, fontsize=fontsize_titles, fontweight="bold", y=0.96) + this_axis.set_extent([-180, 180, -63, 90], crs=ccrs.PlateCarree()) + this_axis.coastlines(linewidth=0.3) + this_axis.set_title(this_title, fontsize=fontsize_titles, fontweight="bold", y=0.96) cbar = plt.colorbar( im1, orientation="horizontal", @@ -876,24 +967,30 @@ def make_map( ticks = np.arange(-60, 91, bin_width) ticklabels = [str(x) for x in ticks] - for i, x in enumerate(ticks): - if x % 2: + for i, tick in enumerate(ticks): + if tick % 2: ticklabels[i] = "" plt.yticks(np.arange(-60, 91, 15), labels=ticklabels, fontsize=fontsize_ticklabels) plt.axis("off") - def get_non_nans(in_da, fillValue): - in_da = in_da.where(in_da != fillValue) + def get_non_nans(in_da, fill_value): + """ + Get non-NaN, non-fill values of a DataArray + """ + in_da = in_da.where(in_da != fill_value) return in_da.values[~np.isnan(in_da.values)] - def set_boxplot_props(bp, color, linewidth): + def set_boxplot_props(bpl, color, linewidth): + """ + Set boxplot properties + """ linewidth = 1.5 - plt.setp(bp["boxes"], color=color, linewidth=linewidth) - plt.setp(bp["whiskers"], color=color, linewidth=linewidth) - plt.setp(bp["caps"], color=color, linewidth=linewidth) - plt.setp(bp["medians"], color=color, linewidth=linewidth) + plt.setp(bpl["boxes"], color=color, linewidth=linewidth) + plt.setp(bpl["whiskers"], color=color, linewidth=linewidth) + plt.setp(bpl["caps"], color=color, linewidth=linewidth) + plt.setp(bpl["medians"], color=color, linewidth=linewidth) plt.setp( - bp["fliers"], + bpl["fliers"], markeredgecolor=color, markersize=6, linewidth=linewidth, @@ -901,16 +998,19 @@ def set_boxplot_props(bp, color, linewidth): ) def make_plot(data, offset, linewidth): + """ + Make boxplot + """ offset = 0.4 * offset bpl = plt.boxplot( data, positions=np.array(range(len(data))) * 2.0 + offset, widths=0.6, - boxprops=dict(linewidth=linewidth), - whiskerprops=dict(linewidth=linewidth), - capprops=dict(linewidth=linewidth), - medianprops=dict(linewidth=linewidth), - flierprops=dict(markeredgewidth=0.5), + boxprops={"linewidth": linewidth}, + whiskerprops={"linewidth": linewidth}, + capprops={"linewidth": linewidth}, + medianprops={"linewidth": linewidth}, + flierprops={"markeredgewidth": 0.5}, ) return bpl @@ -921,26 +1021,31 @@ def make_figures( run1_name, run2_name, logger, - thisDir=None, + this_dir=None, gdd_maps_ds=None, gddharv_maps_ds=None, outdir_figs=None, linewidth=1.5, ): + """ + Make map-and-boxplot figures + """ if not gdd_maps_ds: - if not thisDir: + if not this_dir: error( logger, - "If not providing gdd_maps_ds, you must provide thisDir (location of gdd_maps.nc)", + "If not providing gdd_maps_ds, you must provide thisDir (location of " + + "gdd_maps.nc)", ) - gdd_maps_ds = xr.open_dataset(thisDir + "gdd_maps.nc") + gdd_maps_ds = xr.open_dataset(this_dir + "gdd_maps.nc") if not gddharv_maps_ds: - if not thisDir: + if not this_dir: error( logger, - "If not providing gddharv_maps_ds, you must provide thisDir (location of gddharv_maps.nc)", + "If not providing gddharv_maps_ds, you must provide thisDir (location of " + + "gddharv_maps.nc)", ) - gddharv_maps_ds = xr.open_dataset(thisDir + "gdd_maps.nc") + gddharv_maps_ds = xr.open_dataset(this_dir + "gdd_maps.nc") # Get info incl_vegtypes_str = gdd_maps_ds.attrs["incl_vegtypes_str"] @@ -952,19 +1057,19 @@ def make_figures( if not outdir_figs: outdir_figs = gdd_maps_ds.attrs["outdir_figs"] try: - y1 = gdd_maps_ds.attrs["y1"] - yN = gdd_maps_ds.attrs["yN"] + year_1 = gdd_maps_ds.attrs["y1"] + year_n = gdd_maps_ds.attrs["yN"] # Backwards compatibility with a bug (fixed 2023-01-03) - except: - y1 = gdd_maps_ds.attrs["first_season"] - yN = gdd_maps_ds.attrs["last_season"] + except KeyError: + year_1 = gdd_maps_ds.attrs["first_season"] + year_n = gdd_maps_ds.attrs["last_season"] # Import LU data, if doing so if land_use_file: - y1_lu = y1 if first_land_use_year == None else first_land_use_year - yN_lu = yN if last_land_use_year == None else last_land_use_year - lu_ds = cc.open_lu_ds(land_use_file, y1_lu, yN_lu, gdd_maps_ds, ungrid=False) - lu_years_text = f" (masked by {y1_lu}-{yN_lu} area)" - lu_years_file = f"_mask{y1_lu}-{yN_lu}" + year_1_lu = year_1 if first_land_use_year is None else first_land_use_year + year_n_lu = year_n if last_land_use_year is None else last_land_use_year + lu_ds = cc.open_lu_ds(land_use_file, year_1_lu, year_n_lu, gdd_maps_ds, ungrid=False) + lu_years_text = f" (masked by {year_1_lu}-{year_n_lu} area)" + lu_years_file = f"_mask{year_1_lu}-{year_n_lu}" else: lu_ds = None lu_years_text = "" @@ -980,11 +1085,11 @@ def make_figures( fontsize_axislabels = 12 fontsize_ticklabels = 12 - Nbins = len(lat_bin_edges) - 1 + n_bins = len(lat_bin_edges) - 1 bin_names = ["All"] - for b in np.arange(Nbins): - lower = lat_bin_edges[b] - upper = lat_bin_edges[b + 1] + for this_bin in np.arange(n_bins): + lower = lat_bin_edges[this_bin] + upper = lat_bin_edges[this_bin + 1] bin_names.append(f"{lower}–{upper}") color_old = cropcal_colors_cases(run1_name) @@ -996,13 +1101,13 @@ def make_figures( gdd_units = "GDD (°C • day)" # Maps - ny = 3 - nx = 1 + nplot_y = 3 + nplot_x = 1 log(logger, "Making before/after maps...") vegtype_list = incl_vegtypes_str if land_use_file: vegtype_list += ["Corn", "Cotton", "Rice", "Soybean", "Sugarcane", "Wheat"] - for v, vegtype_str in enumerate(vegtype_list): + for vegtype_str in vegtype_list: print(f"{vegtype_str}...") # Get component types @@ -1025,12 +1130,12 @@ def make_figures( else: crop_fracs_yx = None - theseVars = [f"gdd1_{x}" for x in vegtypes_int] + these_vars = [f"gdd1_{x}" for x in vegtypes_int] gddharv_map_yx = get_multicrop_maps( - gddharv_maps_ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units + gddharv_maps_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units ) gdd_map_yx = get_multicrop_maps( - gdd_maps_ds, theseVars, crop_fracs_yx, dummy_fill, gdd_units + gdd_maps_ds, these_vars, crop_fracs_yx, dummy_fill, gdd_units ) # Get figure title @@ -1048,25 +1153,25 @@ def make_figures( # Set up figure and first subplot if layout == "3x1": fig = plt.figure(figsize=(7.5, 14)) - ax = fig.add_subplot(ny, nx, 1, projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(nplot_y, nplot_x, 1, projection=ccrs.PlateCarree()) elif layout == "2x2": fig = plt.figure(figsize=(12, 6)) spec = fig.add_gridspec(nrows=2, ncols=2, width_ratios=[0.4, 0.6]) - ax = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) elif layout == "3x2": fig = plt.figure(figsize=(14, 9)) spec = fig.add_gridspec(nrows=3, ncols=2, width_ratios=[0.5, 0.5], wspace=0.2) - ax = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(spec[0, 0], projection=ccrs.PlateCarree()) else: error(logger, f"layout {layout} not recognized") - thisMin = int(np.round(np.nanmin(gddharv_map_yx))) - thisMax = int(np.round(np.nanmax(gddharv_map_yx))) - thisTitle = f"{run1_name} (range {thisMin}–{thisMax})" - make_map( - ax, + this_min = int(np.round(np.nanmin(gddharv_map_yx))) + this_max = int(np.round(np.nanmax(gddharv_map_yx))) + this_title = f"{run1_name} (range {this_min}–{this_max})" + make_gengdd_map( + this_axis, gddharv_map_yx, - thisTitle, + this_title, vmax, bin_width, fontsize_ticklabels, @@ -1075,18 +1180,18 @@ def make_figures( ) if layout == "3x1": - ax = fig.add_subplot(ny, nx, 2, projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(nplot_y, nplot_x, 2, projection=ccrs.PlateCarree()) elif layout in ["2x2", "3x2"]: - ax = fig.add_subplot(spec[1, 0], projection=ccrs.PlateCarree()) + this_axis = fig.add_subplot(spec[1, 0], projection=ccrs.PlateCarree()) else: error(logger, f"layout {layout} not recognized") - thisMin = int(np.round(np.nanmin(gdd_map_yx))) - thisMax = int(np.round(np.nanmax(gdd_map_yx))) - thisTitle = f"{run2_name} (range {thisMin}–{thisMax})" - make_map( - ax, + this_min = int(np.round(np.nanmin(gdd_map_yx))) + this_max = int(np.round(np.nanmax(gdd_map_yx))) + this_title = f"{run2_name} (range {this_min}–{this_max})" + make_gengdd_map( + this_axis, gdd_map_yx, - thisTitle, + this_title, vmax, bin_width, fontsize_ticklabels, @@ -1096,22 +1201,22 @@ def make_figures( # Difference if layout == "3x2": - ax = fig.add_subplot(spec[2, 0], projection=ccrs.PlateCarree()) - thisMin = int(np.round(np.nanmin(gdd_map_yx))) - thisMax = int(np.round(np.nanmax(gdd_map_yx))) - thisTitle = f"{run2_name} minus {run1_name}" + this_axis = fig.add_subplot(spec[2, 0], projection=ccrs.PlateCarree()) + this_min = int(np.round(np.nanmin(gdd_map_yx))) + this_max = int(np.round(np.nanmax(gdd_map_yx))) + this_title = f"{run2_name} minus {run1_name}" diff_map_yx = gdd_map_yx - gddharv_map_yx diff_map_yx.attrs["units"] = gdd_units gdd_spacing = 500 - vmax, bounds, Ncolors = get_bounds_ncolors(gdd_spacing, diff_map_yx) - if Ncolors < 9: + vmax, bounds, n_colors = get_bounds_ncolors(gdd_spacing, diff_map_yx) + if n_colors < 9: gdd_spacing = 250 - vmax, bounds, Ncolors = get_bounds_ncolors(gdd_spacing, diff_map_yx) + vmax, bounds, n_colors = get_bounds_ncolors(gdd_spacing, diff_map_yx) - cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], Ncolors) + cmap = cm.get_cmap(cropcal_colors["div_other_nonnorm"], n_colors) cbar_ticks = [] - include_0bin_ticks = Ncolors <= 13 + include_0bin_ticks = n_colors <= 13 if vmax <= 3000: tick_spacing = gdd_spacing * 2 elif vmax <= 5000: @@ -1119,17 +1224,19 @@ def make_figures( else: tick_spacing = 2000 previous = -np.inf - for x in bounds: - if (not include_0bin_ticks) and (x > 0) and (previous < 0): + for bound in bounds: + if (not include_0bin_ticks) and (previous < 0 < bound): cbar_ticks.append(0) - if x % tick_spacing == 0 or (include_0bin_ticks and abs(x) == gdd_spacing / 2): - cbar_ticks.append(x) - previous = x - - make_map( - ax, + if bound % tick_spacing == 0 or ( + include_0bin_ticks and abs(bound) == gdd_spacing / 2 + ): + cbar_ticks.append(bound) + previous = bound + + make_gengdd_map( + this_axis, diff_map_yx, - thisTitle, + this_title, vmax, bin_width, fontsize_ticklabels, @@ -1148,25 +1255,25 @@ def make_figures( lat_abs = np.abs(gdd_map_yx.lat.values) gdd_bybin_old = [gddharv_vector] gdd_bybin_new = [gdd_vector] - for b in np.arange(Nbins): - lower = lat_bin_edges[b] - upper = lat_bin_edges[b + 1] + for this_bin in np.arange(n_bins): + lower = lat_bin_edges[this_bin] + upper = lat_bin_edges[this_bin + 1] lat_inds = np.where((lat_abs >= lower) & (lat_abs < upper))[0] - gdd_vector_thisBin = get_non_nans(gdd_map_yx[lat_inds, :], dummy_fill) - gddharv_vector_thisBin = get_non_nans(gddharv_map_yx[lat_inds, :], dummy_fill) - gdd_bybin_old.append(gddharv_vector_thisBin) - gdd_bybin_new.append(gdd_vector_thisBin) + this_bin_gdd_vector = get_non_nans(gdd_map_yx[lat_inds, :], dummy_fill) + this_bin_gddharv_vector = get_non_nans(gddharv_map_yx[lat_inds, :], dummy_fill) + gdd_bybin_old.append(this_bin_gddharv_vector) + gdd_bybin_new.append(this_bin_gdd_vector) if layout == "3x1": - ax = fig.add_subplot(ny, nx, 3) + this_axis = fig.add_subplot(nplot_y, nplot_x, 3) elif layout in ["2x2", "3x2"]: - ax = fig.add_subplot(spec[:, 1]) + this_axis = fig.add_subplot(spec[:, 1]) else: error(logger, f"layout {layout} not recognized") # Shift bottom of plot up to make room for legend - ax_pos = ax.get_position() - ax.set_position(Bbox.from_extents(ax_pos.x0, 0.19, ax_pos.x1, ax_pos.y1)) + ax_pos = this_axis.get_position() + this_axis.set_position(Bbox.from_extents(ax_pos.x0, 0.19, ax_pos.x1, ax_pos.y1)) # Define legend position legend_bbox_to_anchor = (0, -0.15, 1, 0.2) @@ -1188,13 +1295,13 @@ def make_figures( plt.xticks(range(0, len(bin_names) * 2, 2), bin_names, fontsize=fontsize_ticklabels) plt.yticks(fontsize=fontsize_ticklabels) - ax.spines["right"].set_visible(False) - ax.spines["top"].set_visible(False) + this_axis.spines["right"].set_visible(False) + this_axis.spines["top"].set_visible(False) plt.xlabel("Latitude zone (absolute value)", fontsize=fontsize_axislabels) plt.ylabel(gdd_units, fontsize=fontsize_axislabels) - ax.yaxis.set_label_coords(-0.11, 0.5) - plt.title(f"Zonal changes", fontsize=fontsize_titles, fontweight="bold") + this_axis.yaxis.set_label_coords(-0.11, 0.5) + plt.title("Zonal changes", fontsize=fontsize_titles, fontweight="bold") plt.suptitle( f"Maturity requirements: {vegtype_str_title}" + lu_years_text, @@ -1205,10 +1312,13 @@ def make_figures( if vegtype_str in incl_vegtypes_str: outfile = os.path.join( - outdir_figs, f"{theseVars[0]}_{vegtype_str}_gs{y1}-{yN}{lu_years_file}.png" + outdir_figs, + f"{these_vars[0]}_{vegtype_str}_gs{year_1}-{year_n}{lu_years_file}.png", ) else: - outfile = os.path.join(outdir_figs, f"{vegtype_str}_gs{y1}-{yN}{lu_years_file}.png") + outfile = os.path.join( + outdir_figs, f"{vegtype_str}_gs{year_1}-{year_n}{lu_years_file}.png" + ) plt.savefig(outfile, dpi=300, transparent=False, facecolor="white", bbox_inches="tight") plt.close() From 9c9b23f1fe07e7461fcc23e243e1dd0eacd46e90 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 15:32:39 -0700 Subject: [PATCH 40/85] Satisfy pylint for generate_gdds.py. --- python/ctsm/crop_calendars/generate_gdds.py | 149 ++++++++++---------- 1 file changed, 76 insertions(+), 73 deletions(-) diff --git a/python/ctsm/crop_calendars/generate_gdds.py b/python/ctsm/crop_calendars/generate_gdds.py index 16e3e130da..1af3744b28 100644 --- a/python/ctsm/crop_calendars/generate_gdds.py +++ b/python/ctsm/crop_calendars/generate_gdds.py @@ -1,32 +1,29 @@ -paramfile_dir = "/glade/campaign/cesm/cesmdata/cseg/inputdata/lnd/clm2/paramdata" - -# Import other shared functions +""" +Generate maturity requirements (GDD) from outputs of a GDD-generating run +""" import os -import inspect import sys +import pickle +import datetime as dt +import argparse +import logging +import numpy as np +import xarray as xr # Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script in the RUN phase seems to require the python/ directory to be manually added to path. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. _CTSM_PYTHON = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" ) sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_module as cc -import ctsm.crop_calendars.generate_gdds_functions as gddfn - -# Import everything else -import os -import sys -import numpy as np -import xarray as xr -import pickle -import datetime as dt -import argparse -import logging +import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position +import ctsm.crop_calendars.generate_gdds_functions as gddfn # pylint: disable=wrong-import-position -# Info re: PFT parameter set -my_clm_ver = 51 -my_clm_subver = "c211112" +# Global constants +PARAMFILE_DIR = "/glade/campaign/cesm/cesmdata/cseg/inputdata/lnd/clm2/paramdata" +MY_CLM_VER = 51 +MY_CLM_SUBVER = "c211112" def main( @@ -47,6 +44,7 @@ def main( skip_crops=None, logger=None, ): + # pylint: disable=missing-function-docstring,too-many-statements # Directories to save output files and figures if not output_dir: if only_make_figs: @@ -73,11 +71,14 @@ def main( # Disable plotting if any plotting module is unavailable if save_figs: try: + # pylint: disable=import-outside-toplevel,unused-import,import-error import cartopy import matplotlib - except: + except ModuleNotFoundError as exc: if only_make_figs: - raise RuntimeError("only_make_figs True but not all plotting modules are available") + raise RuntimeError( + "only_make_figs True but not all plotting modules are available" + ) from exc gddfn.log(logger, "Not all plotting modules are available; disabling save_figs") save_figs = False @@ -95,19 +96,21 @@ def main( ########################## if not only_make_figs: - # Keep 1 extra year to avoid incomplete final growing season for crops harvested after Dec. 31. - y1_import_str = f"{first_season+1}-01-01" - yN_import_str = f"{last_season+2}-01-01" + # Keep 1 extra year to avoid incomplete final growing season for crops + # harvested after Dec. 31. + yr_1_import_str = f"{first_season+1}-01-01" + yr_n_import_str = f"{last_season+2}-01-01" gddfn.log( logger, - f"Importing netCDF time steps {y1_import_str} through {yN_import_str} (years are +1 because of CTSM output naming)", + f"Importing netCDF time steps {yr_1_import_str} through {yr_n_import_str} " + + "(years are +1 because of CTSM output naming)", ) pickle_file = os.path.join(output_dir, f"{first_season}-{last_season}.pickle") h2_ds_file = os.path.join(output_dir, f"{first_season}-{last_season}.h2_ds.nc") if os.path.exists(pickle_file): - with open(pickle_file, "rb") as f: + with open(pickle_file, "rb") as file: ( first_season, last_season, @@ -115,14 +118,14 @@ def main( gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, save_figs, incl_vegtypes_str, incl_patches1d_itype_veg, mxsowings, skip_crops, - ) = pickle.load(f) + ) = pickle.load(file) print(f"Will resume import at {pickle_year+1}") h2_ds = None else: @@ -132,17 +135,17 @@ def main( gddaccum_yp_list = [] gddharv_yp_list = [] incl_vegtypes_str = None - lastYear_active_patch_indices_list = None + lastyear_active_patch_indices_list = None sdates_rx = sdates_file hdates_rx = hdates_file if not unlimited_season_length: - mxmats = cc.import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver) + mxmats = cc.import_max_gs_length(PARAMFILE_DIR, MY_CLM_VER, MY_CLM_SUBVER) else: mxmats = None - for y, thisYear in enumerate(np.arange(first_season + 1, last_season + 3)): - if thisYear <= pickle_year: + for yr_index, this_yr in enumerate(np.arange(first_season + 1, last_season + 3)): + if this_yr <= pickle_year: continue ( @@ -152,7 +155,7 @@ def main( gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, incl_vegtypes_str, incl_patches1d_itype_veg, @@ -160,14 +163,14 @@ def main( ) = gddfn.import_and_process_1yr( first_season, last_season, - y, - thisYear, + yr_index, + this_yr, sdates_rx, hdates_rx, gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, input_dir, incl_vegtypes_str, @@ -179,16 +182,16 @@ def main( ) gddfn.log(logger, f" Saving pickle file ({pickle_file})...") - with open(pickle_file, "wb") as f: + with open(pickle_file, "wb") as file: pickle.dump( [ first_season, last_season, - thisYear, + this_yr, gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, - lastYear_active_patch_indices_list, + lastyear_active_patch_indices_list, incorrectly_daily, save_figs, incl_vegtypes_str, @@ -196,7 +199,7 @@ def main( mxsowings, skip_crops, ], - f, + file, protocol=-1, ) @@ -248,35 +251,35 @@ def main( ] dummy_vars = [] dummy_longnames = [] - for v, thisVar in enumerate(all_vars): - if thisVar not in gdd_maps_ds: - dummy_vars.append(thisVar) - dummy_longnames.append(all_longnames[v]) + for var_index, this_var in enumerate(all_vars): + if this_var not in gdd_maps_ds: + dummy_vars.append(this_var) + dummy_longnames.append(all_longnames[var_index]) - def make_dummy(thisCrop_gridded, addend): - dummy_gridded = thisCrop_gridded + def make_dummy(this_crop_gridded, addend): + dummy_gridded = this_crop_gridded dummy_gridded.values = dummy_gridded.values * 0 + addend return dummy_gridded - for v in gdd_maps_ds: - thisCrop_gridded = gdd_maps_ds[v].copy() + for var_index in gdd_maps_ds: + this_crop_gridded = gdd_maps_ds[var_index].copy() break - dummy_gridded = make_dummy(thisCrop_gridded, -1) + dummy_gridded = make_dummy(this_crop_gridded, -1) - for v, thisVar in enumerate(dummy_vars): - if thisVar in gdd_maps_ds: + for var_index, this_var in enumerate(dummy_vars): + if this_var in gdd_maps_ds: gddfn.error( - logger, f"{thisVar} is already in gdd_maps_ds. Why overwrite it with dummy?" + logger, f"{this_var} is already in gdd_maps_ds. Why overwrite it with dummy?" ) - dummy_gridded.name = thisVar - dummy_gridded.attrs["long_name"] = dummy_longnames[v] - gdd_maps_ds[thisVar] = dummy_gridded + dummy_gridded.name = this_var + dummy_gridded.attrs["long_name"] = dummy_longnames[var_index] + gdd_maps_ds[this_var] = dummy_gridded # Add lon/lat attributes - def add_lonlat_attrs(ds): - ds.lon.attrs = {"long_name": "coordinate_longitude", "units": "degrees_east"} - ds.lat.attrs = {"long_name": "coordinate_latitude", "units": "degrees_north"} - return ds + def add_lonlat_attrs(this_ds): + this_ds.lon.attrs = {"long_name": "coordinate_longitude", "units": "degrees_east"} + this_ds.lat.attrs = {"long_name": "coordinate_latitude", "units": "degrees_north"} + return this_ds gdd_maps_ds = add_lonlat_attrs(gdd_maps_ds) gddharv_maps_ds = add_lonlat_attrs(gddharv_maps_ds) @@ -297,14 +300,17 @@ def add_lonlat_attrs(ds): def save_gdds(sdates_file, hdates_file, outfile, gdd_maps_ds, sdates_rx): # Set up output file from template (i.e., prescribed sowing dates). template_ds = xr.open_dataset(sdates_file, decode_times=True) - for v in template_ds: - if "sdate" in v: - template_ds = template_ds.drop(v) + for var in template_ds: + if "sdate" in var: + template_ds = template_ds.drop(var) template_ds.to_netcdf(path=outfile, format="NETCDF3_CLASSIC") template_ds.close() # Add global attributes - comment = f"Derived from CLM run plus crop calendar input files {os.path.basename(sdates_file) and {os.path.basename(hdates_file)}}." + comment = ( + "Derived from CLM run plus crop calendar input files " + + f"{os.path.basename(sdates_file) and {os.path.basename(hdates_file)}}." + ) gdd_maps_ds.attrs = { "author": "Sam Rabin (sam.rabin@gmail.com)", "comment": comment, @@ -384,7 +390,11 @@ def add_attrs_to_map_ds( parser.add_argument( "-i", "--input-dir", - help="Directory where run outputs can be found (and where outputs will go). If --only-make-figs, this is the directory with the preprocessed files (e.g., *.pickle file).", + help=( + "Directory where run outputs can be found (and where outputs will go). If " + + "--only-make-figs, this is the directory with the preprocessed files (e.g., *.pickle " + + "file)." + ), required=True, ) parser.add_argument( @@ -464,7 +474,6 @@ def add_attrs_to_map_ds( args = parser.parse_args(sys.argv[1:]) for k, v in sorted(vars(args).items()): print(f"{k}: {v}") - save_figs = not args.dont_save_figs # Call main() main( @@ -474,7 +483,7 @@ def add_attrs_to_map_ds( sdates_file=args.sdates_file, hdates_file=args.hdates_file, output_dir=args.output_dir, - save_figs=save_figs, + save_figs=not args.dont_save_figs, only_make_figs=args.only_make_figs, run1_name=args.run1_name, run2_name=args.run2_name, @@ -484,9 +493,3 @@ def add_attrs_to_map_ds( unlimited_season_length=args.unlimited_season_length, skip_crops=args.skip_crops, ) - -# main(input_dir="/Users/Shared/CESM_runs/tests_10x15_20230329_gddgen/202303301820", -# sdates_file="/Users/Shared/CESM_work/crop_dates_mostrice/sdates_ggcmi_crop_calendar_phase3_v1.01_nninterp-f10_f10_mg37.2000-2000.20230330_165301.nc", -# hdates_file="/Users/Shared/CESM_work/crop_dates_mostrice/hdates_ggcmi_crop_calendar_phase3_v1.01_nninterp-f10_f10_mg37.2000-2000.20230330_165301.nc", -# first_season=1997, last_season=2003, -# save_figs=False) From 73da27ab293cd07f61a460d6fa8119980db77334 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 21:53:37 -0700 Subject: [PATCH 41/85] Remove unused function from cropcal_utils.py. --- python/ctsm/crop_calendars/cropcal_utils.py | 34 --------------------- 1 file changed, 34 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index ba6c0b6e41..4d77d2ef66 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -15,40 +15,6 @@ import numpy as np import xarray as xr -# from xr_ds_ex import xr_ds_ex - - -# generate annual means, weighted by days / month -def weighted_annual_mean(array, time_in="time", time_out="time"): - if isinstance(array[time_in].values[0], cftime.datetime): - month_length = array[time_in].dt.days_in_month - - # After https://docs.xarray.dev/en/v0.5.1/examples/monthly-means.html - group = f"{time_in}.year" - weights = month_length.groupby(group) / month_length.groupby(group).sum() - np.testing.assert_allclose(weights.groupby(group).sum().values, 1) - array = (array * weights).groupby(group).sum(dim=time_in, skipna=True) - if time_out != "year": - array = array.rename({"year": time_out}) - - else: - mon_day = xr.DataArray( - np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]), dims=["month"] - ) - mon_wgt = mon_day / mon_day.sum() - array = ( - array.rolling({time_in: 12}, center=False) # rolling - .construct("month") # construct the array - .isel( - {time_in: slice(11, None, 12)} - ) # slice so that the first element is [1..12], second is [13..24] - .dot(mon_wgt, dims=["month"]) - ) - if time_in != time_out: - array = array.rename({time_in: time_out}) - - return array - # List of PFTs used in CLM def define_pftlist(): From ddd5e51d59db58259b2fa669cf583dcba64deae3 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 21:53:52 -0700 Subject: [PATCH 42/85] Rename a variable in generate_gdds.py. --- python/ctsm/crop_calendars/generate_gdds.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ctsm/crop_calendars/generate_gdds.py b/python/ctsm/crop_calendars/generate_gdds.py index 1af3744b28..156ebfb20e 100644 --- a/python/ctsm/crop_calendars/generate_gdds.py +++ b/python/ctsm/crop_calendars/generate_gdds.py @@ -261,8 +261,8 @@ def make_dummy(this_crop_gridded, addend): dummy_gridded.values = dummy_gridded.values * 0 + addend return dummy_gridded - for var_index in gdd_maps_ds: - this_crop_gridded = gdd_maps_ds[var_index].copy() + for var in gdd_maps_ds: + this_crop_gridded = gdd_maps_ds[var].copy() break dummy_gridded = make_dummy(this_crop_gridded, -1) From 3b4ae701f12f12eb07ac39fe03e07c70db4a78de Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 21:55:56 -0700 Subject: [PATCH 43/85] Satisfy pylint for cropcal_module.py. --- .../ctsm/crop_calendars/check_rxboth_run.py | 8 +- python/ctsm/crop_calendars/cropcal_module.py | 972 ++++++++++-------- 2 files changed, 557 insertions(+), 423 deletions(-) diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index 6dae071937..30c280120d 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -60,16 +60,16 @@ def main(argv): # These should be constant in a Prescribed Calendars (rxboth) run, as long as the inputs were # static. case = { - "constantVars": ["SDATES", "GDDHARV"], + "const_vars": ["SDATES", "GDDHARV"], "rx_sdates_file": args.rx_sdates_file, "rx_gdds_file": args.rx_gdds_file, } case["ds"] = cc.import_output( annual_outfiles, - myVars=myVars, - y1=args.first_usable_year, - yN=args.last_usable_year, + my_vars=myVars, + year_1=args.first_usable_year, + year_N=args.last_usable_year, ) cc.check_constant_vars(case["ds"], case, ignore_nan=True, verbose=True, throw_error=True) diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index 76c295974d..4fa3cdf5aa 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -1,21 +1,27 @@ -import numpy as np -import xarray as xr +""" +Helper functions for various crop calendar stuff +""" +# pylint: disable=too-many-lines + import warnings import sys import os import glob +import numpy as np +import xarray as xr # Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script in the RUN phase seems to require the python/ directory to be manually added to path. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. _CTSM_PYTHON = os.path.join( os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" ) sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_utils as utils +import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position try: import pandas as pd -except: +except ModuleNotFoundError: pass @@ -38,9 +44,15 @@ }, } +# Minimum harvest threshold allowed in PlantCrop() +# Was 50 before cropcal runs 2023-01-28 +DEFAULT_GDD_MIN = 1.0 -# After importing a file, restrict it to years of interest. -def check_and_trim_years(y1, yN, ds_in): + +def check_and_trim_years(year_1, year_n, ds_in): + """ + After importing a file, restrict it to years of interest. + """ ### In annual outputs, file with name Y is actually results from year Y-1. ### Note that time values refer to when it was SAVED. So 1981-01-01 is for year 1980. @@ -49,65 +61,80 @@ def get_year_from_cftime(cftime_date): return cftime_date.year - 1 # Check that all desired years are included - if get_year_from_cftime(ds_in.time.values[0]) > y1: + if get_year_from_cftime(ds_in.time.values[0]) > year_1: raise RuntimeError( - f"Requested y1 is {y1} but first year in outputs is {get_year_from_cftime(ds_in.time.values[0])}" + f"Requested year_1 is {year_1} but first year in outputs is " + + f"{get_year_from_cftime(ds_in.time.values[0])}" ) - elif get_year_from_cftime(ds_in.time.values[-1]) < y1: + if get_year_from_cftime(ds_in.time.values[-1]) < year_1: raise RuntimeError( - f"Requested yN is {yN} but last year in outputs is {get_year_from_cftime(ds_in.time.values[-1])}" + f"Requested year_n is {year_n} but last year in outputs is " + + f"{get_year_from_cftime(ds_in.time.values[-1])}" ) # Remove years outside range of interest ### Include an extra year at the end to finish out final seasons. - ds_in = utils.safer_timeslice(ds_in, slice(f"{y1+1}-01-01", f"{yN+2}-01-01")) + ds_in = utils.safer_timeslice(ds_in, slice(f"{year_1+1}-01-01", f"{year_n+2}-01-01")) # Make sure you have the expected number of timesteps (including extra year) - Nyears_expected = yN - y1 + 2 - if ds_in.dims["time"] != Nyears_expected: + n_years_expected = year_n - year_1 + 2 + if ds_in.dims["time"] != n_years_expected: raise RuntimeError( - f"Expected {Nyears_expected} timesteps in output but got {ds_in.dims['time']}" + f"Expected {n_years_expected} timesteps in output but got {ds_in.dims['time']}" ) return ds_in -def open_lu_ds(filename, y1, yN, existing_ds, ungrid=True): +def open_lu_ds(filename, year_1, year_n, existing_ds, ungrid=True): + """ + Open land-use dataset + """ # Open and trim to years of interest - dsg = xr.open_dataset(filename).sel(time=slice(y1, yN)) + this_ds_gridded = xr.open_dataset(filename).sel(time=slice(year_1, year_n)) # Assign actual lon/lat coordinates - dsg = dsg.assign_coords( + this_ds_gridded = this_ds_gridded.assign_coords( lon=("lsmlon", existing_ds.lon.values), lat=("lsmlat", existing_ds.lat.values) ) - dsg = dsg.swap_dims({"lsmlon": "lon", "lsmlat": "lat"}) - - if "AREA" in dsg: - dsg["AREA_CFT"] = dsg.AREA * 1e6 * dsg.LANDFRAC_PFT * dsg.PCT_CROP / 100 * dsg.PCT_CFT / 100 - dsg["AREA_CFT"].attrs = {"units": "m2"} - dsg["AREA_CFT"].load() + this_ds_gridded = this_ds_gridded.swap_dims({"lsmlon": "lon", "lsmlat": "lat"}) + + if "AREA" in this_ds_gridded: + this_ds_gridded["AREA_CFT"] = ( + this_ds_gridded.AREA + * 1e6 + * this_ds_gridded.LANDFRAC_PFT + * this_ds_gridded.PCT_CROP + / 100 + * this_ds_gridded.PCT_CFT + / 100 + ) + this_ds_gridded["AREA_CFT"].attrs = {"units": "m2"} + this_ds_gridded["AREA_CFT"].load() else: print("Warning: AREA missing from Dataset, so AREA_CFT will not be created") if not ungrid: - return dsg + return this_ds_gridded # Un-grid query_ilons = [int(x) - 1 for x in existing_ds["patches1d_ixy"].values] query_ilats = [int(x) - 1 for x in existing_ds["patches1d_jxy"].values] - query_ivts = [list(dsg.cft.values).index(x) for x in existing_ds["patches1d_itype_veg"].values] + query_ivts = [ + list(this_ds_gridded.cft.values).index(x) for x in existing_ds["patches1d_itype_veg"].values + ] - ds = xr.Dataset(attrs=dsg.attrs) - for v in ["AREA", "LANDFRAC_PFT", "PCT_CFT", "PCT_CROP", "AREA_CFT"]: - if v not in dsg: + this_ds = xr.Dataset(attrs=this_ds_gridded.attrs) + for var in ["AREA", "LANDFRAC_PFT", "PCT_CFT", "PCT_CROP", "AREA_CFT"]: + if var not in this_ds_gridded: continue - if "time" in dsg[v].dims: + if "time" in this_ds_gridded[var].dims: new_coords = existing_ds["GRAINC_TO_FOOD_ANN"].coords else: new_coords = existing_ds["patches1d_lon"].coords - if "cft" in dsg[v].dims: - ds[v] = ( - dsg[v] + if "cft" in this_ds_gridded[var].dims: + this_ds[var] = ( + this_ds_gridded[var] .isel( lon=xr.DataArray(query_ilons, dims="patch"), lat=xr.DataArray(query_ilats, dims="patch"), @@ -117,8 +144,8 @@ def open_lu_ds(filename, y1, yN, existing_ds, ungrid=True): .assign_coords(new_coords) ) else: - ds[v] = ( - dsg[v] + this_ds[var] = ( + this_ds_gridded[var] .isel( lon=xr.DataArray(query_ilons, dims="patch"), lat=xr.DataArray(query_ilats, dims="patch"), @@ -126,67 +153,73 @@ def open_lu_ds(filename, y1, yN, existing_ds, ungrid=True): ) .assign_coords(new_coords) ) - for v in existing_ds: - if "patches1d_" in v or "grid1d_" in v: - ds[v] = existing_ds[v] - ds["lon"] = dsg["lon"] - ds["lat"] = dsg["lat"] + for var in existing_ds: + if "patches1d_" in var or "grid1d_" in var: + this_ds[var] = existing_ds[var] + this_ds["lon"] = this_ds_gridded["lon"] + this_ds["lat"] = this_ds_gridded["lat"] # Which crops are irrigated? - is_irrigated = np.full_like(ds["patches1d_itype_veg"], False) - for vegtype_str in np.unique(ds["patches1d_itype_veg_str"].values): + is_irrigated = np.full_like(this_ds["patches1d_itype_veg"], False) + for vegtype_str in np.unique(this_ds["patches1d_itype_veg_str"].values): if "irrigated" not in vegtype_str: continue vegtype_int = utils.ivt_str2int(vegtype_str) - is_this_vegtype = np.where(ds["patches1d_itype_veg"].values == vegtype_int)[0] + is_this_vegtype = np.where(this_ds["patches1d_itype_veg"].values == vegtype_int)[0] is_irrigated[is_this_vegtype] = True - ["irrigated" in x for x in ds["patches1d_itype_veg_str"].values] - ds["IRRIGATED"] = xr.DataArray( + this_ds["IRRIGATED"] = xr.DataArray( data=is_irrigated, - coords=ds["patches1d_itype_veg_str"].coords, + coords=this_ds["patches1d_itype_veg_str"].coords, attrs={"long_name": "Is patch irrigated?"}, ) # How much area is irrigated? - ds["IRRIGATED_AREA_CFT"] = ds["IRRIGATED"] * ds["AREA_CFT"] - ds["IRRIGATED_AREA_CFT"].attrs = { + this_ds["IRRIGATED_AREA_CFT"] = this_ds["IRRIGATED"] * this_ds["AREA_CFT"] + this_ds["IRRIGATED_AREA_CFT"].attrs = { "long name": "CFT area (irrigated types only)", "units": "m^2", } - ds["IRRIGATED_AREA_GRID"] = ( - ds["IRRIGATED_AREA_CFT"] - .groupby(ds["patches1d_gi"]) + this_ds["IRRIGATED_AREA_GRID"] = ( + this_ds["IRRIGATED_AREA_CFT"] + .groupby(this_ds["patches1d_gi"]) .sum() .rename({"patches1d_gi": "gridcell"}) ) - ds["IRRIGATED_AREA_GRID"].attrs = {"long name": "Irrigated area in gridcell", "units": "m^2"} + this_ds["IRRIGATED_AREA_GRID"].attrs = { + "long name": "Irrigated area in gridcell", + "units": "m^2", + } - return ds + return this_ds def check_constant_vars( - this_ds, case, ignore_nan, constantGSs=None, verbose=True, throw_error=True + this_ds, case, ignore_nan, const_growing_seasons=None, verbose=True, throw_error=True ): + """ + For variables that should stay constant, make sure they are + """ if isinstance(case, str): - constantVars = [case] + const_vars = [case] elif isinstance(case, list): - constantVars = case + const_vars = case elif isinstance(case, dict): - constantVars = case["constantVars"] + const_vars = case["const_vars"] else: raise TypeError(f"case must be str or dict, not {type(case)}") - if not constantVars: + if not const_vars: return None - if constantGSs: - gs0 = this_ds.gs.values[0] - gsN = this_ds.gs.values[-1] - if constantGSs.start > gs0 or constantGSs.stop < gsN: + if const_growing_seasons: + gs_0 = this_ds.gs.values[0] + gs_n = this_ds.gs.values[-1] + if const_growing_seasons.start > gs_0 or const_growing_seasons.stop < gs_n: print( - f"❗ Only checking constantVars over {constantGSs.start}-{constantGSs.stop} (run includes {gs0}-{gsN})" + f"❗ Only checking const_vars over {const_growing_seasons.start}-" + + f"{const_growing_seasons.stop} (run includes {gs_0}-{gs_n})" ) - this_ds = this_ds.sel(gs=constantGSs) + this_ds = this_ds.sel(gs=const_growing_seasons) any_bad = False any_bad_before_checking_rx = False @@ -194,155 +227,168 @@ def check_constant_vars( emojus = "❌" else: emojus = "❗" - if not isinstance(constantVars, list): - constantVars = [constantVars] + if not isinstance(const_vars, list): + const_vars = [const_vars] - for v in constantVars: - ok = True + for var in const_vars: + everything_ok = True - if "gs" in this_ds[v].dims: + if "gs" in this_ds[var].dims: time_coord = "gs" - elif "time" in this_ds[v].dims: + elif "time" in this_ds[var].dims: time_coord = "time" else: - raise RuntimeError(f"Which of these is the time coordinate? {this_ds[v].dims}") - i_time_coord = this_ds[v].dims.index(time_coord) + raise RuntimeError(f"Which of these is the time coordinate? {this_ds[var].dims}") + i_time_coord = this_ds[var].dims.index(time_coord) - this_da = this_ds[v] + this_da = this_ds[var] ra_sp = np.moveaxis(this_da.copy().values, i_time_coord, 0) incl_patches = [] bad_patches = np.array([]) - strList = [] + str_list = [] # Read prescription file, if needed rx_ds = None if isinstance(case, dict): - if v == "GDDHARV" and "rx_gdds_file" in case: + if var == "GDDHARV" and "rx_gdds_file" in case: rx_ds = import_rx_dates( "gdd", case["rx_gdds_file"], this_ds, set_neg1_to_nan=False ).squeeze() - for t1 in np.arange(this_ds.dims[time_coord] - 1): - condn = ~np.isnan(ra_sp[t1, ...]) - if t1 > 0: - condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:t1, ...]), axis=0)) - thesePatches = np.where(condn)[0] - if thesePatches.size == 0: + for time_1 in np.arange(this_ds.dims[time_coord] - 1): + condn = ~np.isnan(ra_sp[time_1, ...]) + if time_1 > 0: + condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:time_1, ...]), axis=0)) + these_patches = np.where(condn)[0] + if these_patches.size == 0: continue - thesePatches = list(np.where(condn)[0]) - incl_patches += thesePatches + these_patches = list(np.where(condn)[0]) + incl_patches += these_patches # print(f't1 {t1}: {thesePatches}') - t1_yr = this_ds[time_coord].values[t1] - t1_vals = np.squeeze(this_da.isel({time_coord: t1, "patch": thesePatches}).values) + t1_yr = this_ds[time_coord].values[time_1] + t1_vals = np.squeeze(this_da.isel({time_coord: time_1, "patch": these_patches}).values) - for t in np.arange(t1 + 1, this_ds.dims[time_coord]): - t_yr = this_ds[time_coord].values[t] - t_vals = np.squeeze(this_da.isel({time_coord: t, "patch": thesePatches}).values) + for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]): + t_yr = this_ds[time_coord].values[timestep] + t_vals = np.squeeze( + this_da.isel({time_coord: timestep, "patch": these_patches}).values + ) ok_p = t1_vals == t_vals - # If allowed, ignore where either t or t1 is NaN. Should only be used for runs where land use varies over time. + # If allowed, ignore where either t or t1 is NaN. Should only be used for runs where + # land use varies over time. if ignore_nan: ok_p = np.squeeze(np.bitwise_or(ok_p, np.isnan(t1_vals + t_vals))) if not np.all(ok_p): any_bad_before_checking_rx = True - bad_patches_thisT = list(np.where(np.bitwise_not(ok_p))[0]) + bad_patches_this_time = list(np.where(np.bitwise_not(ok_p))[0]) bad_patches = np.concatenate( - (bad_patches, np.array(thesePatches)[bad_patches_thisT]) + (bad_patches, np.array(these_patches)[bad_patches_this_time]) ) if rx_ds: found_in_rx = np.array([False for x in bad_patches]) - varyPatches = list(np.array(thesePatches)[bad_patches_thisT]) - varyLons = this_ds.patches1d_lon.values[bad_patches_thisT] - varyLats = this_ds.patches1d_lat.values[bad_patches_thisT] - varyCrops = this_ds.patches1d_itype_veg_str.values[bad_patches_thisT] - varyCrops_int = this_ds.patches1d_itype_veg.values[bad_patches_thisT] - - any_bad_anyCrop = False - for c in np.unique(varyCrops_int): - rx_var = f"gs1_{c}" - varyLons_thisCrop = varyLons[np.where(varyCrops_int == c)] - varyLats_thisCrop = varyLats[np.where(varyCrops_int == c)] - theseRxVals = np.diag( - rx_ds[rx_var].sel(lon=varyLons_thisCrop, lat=varyLats_thisCrop).values + vary_patches = list(np.array(these_patches)[bad_patches_this_time]) + vary_lons = this_ds.patches1d_lon.values[bad_patches_this_time] + vary_lats = this_ds.patches1d_lat.values[bad_patches_this_time] + vary_crops = this_ds.patches1d_itype_veg_str.values[bad_patches_this_time] + vary_crops_int = this_ds.patches1d_itype_veg.values[bad_patches_this_time] + + any_bad_any_crop = False + for crop_int in np.unique(vary_crops_int): + rx_var = f"gs1_{crop_int}" + vary_lons_this_crop = vary_lons[np.where(vary_crops_int == crop_int)] + vary_lats_this_crop = vary_lats[np.where(vary_crops_int == crop_int)] + these_rx_vals = np.diag( + rx_ds[rx_var] + .sel(lon=vary_lons_this_crop, lat=vary_lats_this_crop) + .values ) - if len(theseRxVals) != len(varyLats_thisCrop): + if len(these_rx_vals) != len(vary_lats_this_crop): raise RuntimeError( - f"Expected {len(varyLats_thisCrop)} rx values; got {len(theseRxVals)}" + f"Expected {len(vary_lats_this_crop)} rx values; got " + + f"{len(these_rx_vals)}" ) - if not np.any(theseRxVals != -1): + if not np.any(these_rx_vals != -1): continue - any_bad_anyCrop = True + any_bad_any_crop = True break - if not any_bad_anyCrop: + if not any_bad_any_crop: continue - # This bit is pretty inefficient, but I'm not going to optimize it until I actually need to use it. - for i, p in enumerate(bad_patches_thisT): - thisPatch = varyPatches[i] - thisLon = varyLons[i] - thisLat = varyLats[i] - thisCrop = varyCrops[i] - thisCrop_int = varyCrops_int[i] + # This bit is pretty inefficient, but I'm not going to optimize it until I + # actually need to use it. + for i, patch in enumerate(bad_patches_this_time): + this_patch = vary_patches[i] + this_lon = vary_lons[i] + this_lat = vary_lats[i] + this_crop = vary_crops[i] + this_crop_int = vary_crops_int[i] # If prescribed input had missing value (-1), it's fine for it to vary. if rx_ds: - rx_var = f"gs1_{thisCrop_int}" - if thisLon in rx_ds.lon.values and thisLat in rx_ds.lat.values: - rx = rx_ds[rx_var].sel(lon=thisLon, lat=thisLat).values - Nunique = len(np.unique(rx)) - if Nunique == 1: + rx_var = f"gs1_{this_crop_int}" + if this_lon in rx_ds.lon.values and this_lat in rx_ds.lat.values: + rx_vals = rx_ds[rx_var].sel(lon=this_lon, lat=this_lat).values + n_unique = len(np.unique(rx_vals)) + if n_unique == 1: found_in_rx[i] = True - if rx == -1: + if rx_vals == -1: continue - elif Nunique > 1: + elif n_unique > 1: raise RuntimeError( - f"How does lon {thisLon} lat {thisLat} {thisCrop} have time-varying {v}?" + f"How does lon {this_lon} lat {this_lat} {this_crop} have " + + f"time-varying {var}?" ) else: raise RuntimeError( - "lon {thisLon} lat {thisLat} {thisCrop} not in rx dataset?" + f"lon {this_lon} lat {this_lat} {this_crop} not in rx dataset?" ) # Print info (or save to print later) any_bad = True if verbose: - thisStr = f" Patch {thisPatch} (lon {thisLon} lat {thisLat}) {thisCrop} ({thisCrop_int})" + this_str = ( + f" Patch {this_patch} (lon {this_lon} lat {this_lat}) " + + f"{this_crop} ({this_crop_int})" + ) if rx_ds and not found_in_rx[i]: - thisStr = thisStr.replace("(lon", "* (lon") - if not np.isnan(t1_vals[p]): - t1_val_print = int(t1_vals[p]) + this_str = this_str.replace("(lon", "* (lon") + if not np.isnan(t1_vals[patch]): + t1_val_print = int(t1_vals[patch]) else: t1_val_print = "NaN" - if not np.isnan(t_vals[p]): - t_val_print = int(t_vals[p]) + if not np.isnan(t_vals[patch]): + t_val_print = int(t_vals[patch]) else: t_val_print = "NaN" - if v == "SDATES": - strList.append( - f"{thisStr}: Sowing {t1_yr} jday {t1_val_print}, {t_yr} jday {t_val_print}" + if var == "SDATES": + str_list.append( + f"{this_str}: Sowing {t1_yr} jday {t1_val_print}, {t_yr} " + + f"jday {t_val_print}" ) else: - strList.append( - f"{thisStr}: {t1_yr} {v} {t1_val_print}, {t_yr} {v} {t_val_print}" + str_list.append( + f"{this_str}: {t1_yr} {var} {t1_val_print}, {t_yr} {var} " + + f"{t_val_print}" ) else: - if ok: - print(f"{emojus} CLM output {v} unexpectedly vary over time:") - ok = False - print(f"{v} timestep {t} does not match timestep {t1}") + if everything_ok: + print(f"{emojus} CLM output {var} unexpectedly vary over time:") + everything_ok = False + print(f"{var} timestep {timestep} does not match timestep {time_1}") break if verbose and any_bad: - print(f"{emojus} CLM output {v} unexpectedly vary over time:") - strList.sort() + print(f"{emojus} CLM output {var} unexpectedly vary over time:") + str_list.sort() if rx_ds and np.any(~found_in_rx): - strList = [ + str_list = [ "*: Not found in prescribed input file (maybe minor lon/lat mismatch)" - ] + strList + ] + str_list elif not rx_ds: - strList = ["(No rx file checked)"] + strList - print("\n".join(strList)) + str_list = ["(No rx file checked)"] + str_list + print("\n".join(str_list)) # Make sure every patch was checked once (or is all-NaN except possibly final season) incl_patches = np.sort(incl_patches) @@ -365,21 +411,23 @@ def check_constant_vars( if not np.array_equal(incl_patches, np.unique(incl_patches)): raise RuntimeError("Patch(es) checked but also all-NaN??") if not np.array_equal(incl_patches, np.arange(this_ds.dims["patch"])): - for p in np.arange(this_ds.dims["patch"]): - if p not in incl_patches: + for patch in np.arange(this_ds.dims["patch"]): + if patch not in incl_patches: break raise RuntimeError( - f"Not all patches checked! E.g., {p}: {this_da.isel(patch=p).values}" + f"Not all patches checked! E.g., {patch}: {this_da.isel(patch=patch).values}" ) if not any_bad: if any_bad_before_checking_rx: print( - f"✅ CLM output {v} do not vary through {this_ds.dims[time_coord]} growing seasons of output (except for patch(es) with missing rx)." + f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + + "seasons of output (except for patch(es) with missing rx)." ) else: print( - f"✅ CLM output {v} do not vary through {this_ds.dims[time_coord]} growing seasons of output." + f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + + "seasons of output." ) if any_bad and throw_error: @@ -392,6 +440,9 @@ def check_constant_vars( def check_rx_obeyed( vegtype_list, rx_ds, dates_ds, which_ds, output_var, gdd_min=None, verbose=False ): + """ + Check that prescribed crop calendars were obeyed + """ all_ok = 2 diff_str_list = [] gdd_tolerance = 1 @@ -403,25 +454,26 @@ def check_rx_obeyed( ) pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) print( - f"{which_ds} harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harv at maturity)" + f"{which_ds} harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harv at " + + "maturity)" ) for vegtype_str in vegtype_list: - thisVeg_patches = np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] - if thisVeg_patches.size == 0: + thisveg_patches = np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] + if thisveg_patches.size == 0: continue - ds_thisVeg = dates_ds.isel(patch=thisVeg_patches) - patch_inds_lon_thisVeg = ds_thisVeg.patches1d_ixy.values.astype(int) - 1 - patch_inds_lat_thisVeg = ds_thisVeg.patches1d_jxy.values.astype(int) - 1 - patch_lons_thisVeg = ds_thisVeg.patches1d_lon - patch_lats_thisVeg = ds_thisVeg.patches1d_lat + ds_thisveg = dates_ds.isel(patch=thisveg_patches) + patch_inds_lon_thisveg = ds_thisveg.patches1d_ixy.values.astype(int) - 1 + patch_inds_lat_thisveg = ds_thisveg.patches1d_jxy.values.astype(int) - 1 + patch_lons_thisveg = ds_thisveg.patches1d_lon + patch_lats_thisveg = ds_thisveg.patches1d_lat vegtype_int = utils.vegtype_str2int(vegtype_str)[0] rx_da = rx_ds[f"gs1_{vegtype_int}"] - rx_array = rx_da.values[patch_inds_lat_thisVeg, patch_inds_lon_thisVeg] + rx_array = rx_da.values[patch_inds_lat_thisveg, patch_inds_lon_thisveg] rx_array = np.expand_dims(rx_array, axis=1) - sim_array = ds_thisVeg[output_var].values - sim_array_dims = ds_thisVeg[output_var].dims + sim_array = ds_thisveg[output_var].values + sim_array_dims = ds_thisveg[output_var].dims # Ignore patches without prescribed value with np.errstate(invalid="ignore"): @@ -430,10 +482,11 @@ def check_rx_obeyed( # Account for... if "GDDHARV" in output_var: # ...GDD harvest threshold minimum set in PlantCrop() - if gdd_min == None: - gdd_min = default_gdd_min() + if gdd_min is None: + gdd_min = DEFAULT_GDD_MIN print( - f"gdd_min not provided when doing check_rx_obeyed() for {output_var}; using default {gdd_min}" + f"gdd_min not provided when doing check_rx_obeyed() for {output_var}; using " + + f"default {gdd_min}" ) with np.errstate(invalid="ignore"): rx_array[(rx_array >= 0) & (rx_array < gdd_min)] = gdd_min @@ -443,11 +496,13 @@ def check_rx_obeyed( # 1: Harvesting at maturity # 2: Harvesting at max season length (mxmat) # 3: Crop was incorrectly planted in last time step of Dec. 31 - # 4: Today was supposed to be the planting day, but the previous crop still hasn't been harvested. + # 4: Today was supposed to be the planting day, but the previous crop still hasn't been + # harvested. # 5: Harvest the day before the next sowing date this year. # 6: Same as #5. - # 7: Harvest the day before the next sowing date (today is Dec. 31 and the sowing date is Jan. 1) - harvest_reason_da = ds_thisVeg["HARVEST_REASON"] + # 7: Harvest the day before the next sowing date (today is Dec. 31 and the sowing date + # is Jan. 1) + harvest_reason_da = ds_thisveg["HARVEST_REASON"] unique_harvest_reasons = np.unique( harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] ) @@ -456,43 +511,51 @@ def check_rx_obeyed( if np.any(sim_array != rx_array): diff_array = sim_array - rx_array - # Allow negative GDDHARV values when harvest occurred because sowing was scheduled for the next day + # Allow negative GDDHARV values when harvest occurred because sowing was scheduled for + # the next day if output_var == "GDDHARV_PERHARV": diff_array = np.ma.masked_array( diff_array, - mask=(diff_array < 0) & (ds_thisVeg["HARVEST_REASON_PERHARV"].values == 5), + mask=(diff_array < 0) & (ds_thisveg["HARVEST_REASON_PERHARV"].values == 5), ) elif output_var == "GDDHARV": with np.errstate(invalid="ignore"): diff_lt_0 = diff_array < 0 - harv_reason_5 = ds_thisVeg["HARVEST_REASON"].values == 5 + harv_reason_5 = ds_thisveg["HARVEST_REASON"].values == 5 diff_array = np.ma.masked_array(diff_array, mask=diff_lt_0 & harv_reason_5) with np.errstate(invalid="ignore"): abs_gt_0 = abs(diff_array) > 0 if np.any(np.abs(diff_array[abs_gt_0]) > 0): - min_diff, minLon, minLat, minGS, minRx = get_extreme_info( + min_diff, min_lon, min_lat, min_gs, min_rx = get_extreme_info( diff_array, rx_array, np.nanmin, sim_array_dims, dates_ds.gs, - patch_lons_thisVeg, - patch_lats_thisVeg, + patch_lons_thisveg, + patch_lats_thisveg, ) - max_diff, maxLon, maxLat, maxGS, maxRx = get_extreme_info( + max_diff, max_lon, max_lat, max_gs, max_rx = get_extreme_info( diff_array, rx_array, np.nanmax, sim_array_dims, dates_ds.gs, - patch_lons_thisVeg, - patch_lats_thisVeg, + patch_lons_thisveg, + patch_lats_thisveg, ) - diffs_eg_txt = f"{vegtype_str} ({vegtype_int}): diffs range {min_diff} (lon {minLon}, lat {minLat}, gs {minGS}, rx ~{minRx}) to {max_diff} (lon {maxLon}, lat {maxLat}, gs {maxGS}, rx ~{maxRx})" + diffs_eg_txt = ( + f"{vegtype_str} ({vegtype_int}): diffs range {min_diff} (lon {min_lon}, lat " + + f"{min_lat}, gs {min_gs}, rx ~{min_rx}) to {max_diff} (lon {max_lon}, lat " + + f"{max_lat}, gs {max_gs}, rx ~{max_rx})" + ) if "GDDHARV" in output_var: - diffs_eg_txt += f"; harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harvested at maturity)" + diffs_eg_txt += ( + f"; harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}" + + "% harvested at maturity)" + ) if "GDDHARV" in output_var and np.nanmax(abs(diff_array)) <= gdd_tolerance: if all_ok > 0: all_ok = 1 @@ -501,7 +564,8 @@ def check_rx_obeyed( all_ok = 0 if verbose: print( - f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}" + f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., " + + f"{diffs_eg_txt}" ) else: break @@ -512,56 +576,67 @@ def check_rx_obeyed( # print(f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable:") # for x in diff_str_list: print(x) print( - f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable (diffs <= {gdd_tolerance})" + f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable (diffs <= " + + f"{gdd_tolerance})" ) elif not verbose: print(f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}") -# Make sure that, e.g., GDDACCUM_PERHARV is always <= HUI_PERHARV -def check_v0_le_v1(this_ds, vars, msg_txt=" ", both_nan_ok=False, throw_error=False): - v0 = vars[0] - v1 = vars[1] - gdd_lt_hui = this_ds[v0] <= this_ds[v1] +def check_v0_le_v1(this_ds, var_list, msg_txt=" ", both_nan_ok=False, throw_error=False): + """ + Make sure that, e.g., GDDACCUM_PERHARV is always <= HUI_PERHARV + """ + var0 = var_list[0] + var1 = var_list[1] + gdd_lt_hui = this_ds[var0] <= this_ds[var1] if both_nan_ok: - gdd_lt_hui = gdd_lt_hui | (np.isnan(this_ds[v0]) & np.isnan(this_ds[v1])) + gdd_lt_hui = gdd_lt_hui | (np.isnan(this_ds[var0]) & np.isnan(this_ds[var1])) if np.all(gdd_lt_hui): - print(f"✅{msg_txt}{v0} always <= {v1}") + print(f"✅{msg_txt}{var0} always <= {var1}") else: - msg = f"❌{msg_txt}{v0} *not* always <= {v1}" + msg = f"❌{msg_txt}{var0} *not* always <= {var1}" gdd_lt_hui_vals = gdd_lt_hui.values - p = np.where(~gdd_lt_hui_vals)[0][0] + patch_index = np.where(~gdd_lt_hui_vals)[0][0] msg = ( msg - + f"\ne.g., patch {p}: {this_ds.patches1d_itype_veg_str.values[p]}, lon {this_ds.patches1d_lon.values[p]} lat {this_ds.patches1d_lat.values[p]}:" + + f"\ne.g., patch {patch_index}: {this_ds.patches1d_itype_veg_str.values[patch_index]}," + + f" lon {this_ds.patches1d_lon.values[patch_index]} lat " + + f"{this_ds.patches1d_lat.values[patch_index]}:" ) - msg = msg + f"\n{this_ds[v0].values[p,:]}" - msg = msg + f"\n{this_ds[v1].values[p,:]}" + msg = msg + f"\n{this_ds[var0].values[patch_index,:]}" + msg = msg + f"\n{this_ds[var1].values[patch_index,:]}" if throw_error: print(msg) else: raise RuntimeError(msg) -# Convert time*mxharvests axes to growingseason axis -def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): +def convert_axis_time2gs(this_ds, verbose=False, my_vars=None, incl_orig=False): + """ + Convert time*mxharvests axes to growingseason axis + """ # How many non-NaN patch-seasons do we expect to have once we're done organizing things? - Npatch = this_ds.dims["patch"] - # Because some patches will be planted in the last year but not complete, we have to ignore any finalyear-planted seasons that do complete. - Ngs = this_ds.dims["time"] - 1 - expected_valid = Npatch * Ngs + n_patch = this_ds.dims["patch"] + # Because some patches will be planted in the last year but not complete, we have to ignore any + # finalyear-planted seasons that do complete. + n_gs = this_ds.dims["time"] - 1 + expected_valid = n_patch * n_gs mxharvests = this_ds.dims["mxharvests"] if verbose: print( - f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} patch-seasons" + f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} " + + "patch-seasons" ) - # Set all non-positive date values to NaN. These are seasons that were never harvested (or never started): "non-seasons." + # Set all non-positive date values to NaN. These are seasons that were never harvested + # (or never started): "non-seasons." if this_ds.HDATES.dims != ("time", "mxharvests", "patch"): raise RuntimeError( - f"This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not {this_ds.HDATES.dims}" + "This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not " + + f"{this_ds.HDATES.dims}" ) hdates_ymp = this_ds.HDATES.copy().where(this_ds.HDATES > 0).values hdates_pym = np.transpose(hdates_ymp.copy(), (2, 0, 1)) @@ -578,9 +653,10 @@ def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): # Find seasons that were planted while the patch was inactive with np.errstate(invalid="ignore"): sown_inactive_py = inactive_py[:, :-1] & (hdates_pym[:, 1:, 0] < sdates_pym[:, 1:, 0]) - sown_inactive_py = np.concatenate((np.full((Npatch, 1), False), sown_inactive_py), axis=1) + sown_inactive_py = np.concatenate((np.full((n_patch, 1), False), sown_inactive_py), axis=1) - # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was inactive" + # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was + # inactive" with np.errstate(invalid="ignore"): first_season_before_first_year_p = hdates_pym[:, 0, 0] < sdates_pym[:, 0, 0] first_season_before_first_year_py = np.full(hdates_pym.shape[:-1], fill_value=False) @@ -589,7 +665,7 @@ def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): sown_prerun_or_inactive_pym = np.concatenate( ( np.expand_dims(sown_prerun_or_inactive_py, axis=2), - np.full((Npatch, Ngs + 1, mxharvests - 1), False), + np.full((n_patch, n_gs + 1, mxharvests - 1), False), ), axis=2, ) @@ -598,12 +674,17 @@ def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): sdates_pym[where_sown_prerun_or_inactive_pym] = np.nan if verbose: print( - f'After "Ignore harvests from before this output began: discrepancy of {np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons' + "After 'Ignore harvests from before this output began: discrepancy of " + + f"{np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons'" ) - # We need to keep some non-seasons---it's possible that "the yearY growing season" never happened (sowing conditions weren't met), but we still need something there so that we can make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. + # We need to keep some non-seasons---it's possible that "the yearY growing season" never + # happened (sowing conditions weren't met), but we still need something there so that we can + # make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to + # -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. - # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was intentionally ignored above." + # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was + # intentionally ignored above." sdates_orig_ymp = this_ds.SDATES.copy().values sdates_orig_pym = np.transpose(sdates_orig_ymp.copy(), (2, 0, 1)) hdates_pym2 = hdates_pym.copy() @@ -615,43 +696,45 @@ def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): where_nosow_py_1st = np.where(nosow_py_1st) hdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf sdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf - for h in np.arange(mxharvests - 1): - if h == 0: + for harvest_index in np.arange(mxharvests - 1): + if harvest_index == 0: continue - elif h == 1: + elif harvest_index == 1: print("Warning: Untested with mxharvests > 2") where_nosow_py = np.where( nosow_py - & ~np.any(np.isnan(hdates_pym[:, :, 0:h]), axis=2) - & np.isnan(hdates_pym[:, :, h]) + & ~np.any(np.isnan(hdates_pym[:, :, 0:harvest_index]), axis=2) + & np.isnan(hdates_pym[:, :, harvest_index]) ) - hdates_pym2[where_nosow_py[0], where_nosow_py[1], h + 1] = -np.inf - sdates_pym2[where_nosow_py[0], where_nosow_py[1], h + 1] = -np.inf + hdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf + sdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf - # "In years with sowing that are followed by inactive years, check whether the last sowing was harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] no-harvest is meaningful." + # "In years with sowing that are followed by inactive years, check whether the last sowing was + # harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] + # no-harvest is meaningful." sdates_orig_masked_pym = sdates_orig_pym.copy() with np.errstate(invalid="ignore"): sdates_le_0 = sdates_orig_masked_pym <= 0 sdates_orig_masked_pym[np.where(sdates_le_0)] = np.nan with warnings.catch_warnings(): warnings.filterwarnings(action="ignore", message="All-NaN slice encountered") - last_sdate_firstNgs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) - last_hdate_firstNgs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) + last_sdate_first_n_gs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) + last_hdate_first_n_gs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) with np.errstate(invalid="ignore"): - hdate_lt_sdate = last_hdate_firstNgs_py < last_sdate_firstNgs_py - last_sowing_not_harvested_sameyear_firstNgs_py = hdate_lt_sdate | np.isnan( - last_hdate_firstNgs_py + hdate_lt_sdate = last_hdate_first_n_gs_py < last_sdate_first_n_gs_py + last_sowing_not_harvested_sameyear_first_n_gs_py = hdate_lt_sdate | np.isnan( + last_hdate_first_n_gs_py ) - inactive_lastNgs_py = inactive_py[:, 1:] - last_sowing_never_harvested_firstNgs_py = ( - last_sowing_not_harvested_sameyear_firstNgs_py & inactive_lastNgs_py + inactive_last_n_gs_py = inactive_py[:, 1:] + last_sowing_never_harvested_first_n_gs_py = ( + last_sowing_not_harvested_sameyear_first_n_gs_py & inactive_last_n_gs_py ) last_sowing_never_harvested_py = np.concatenate( - (last_sowing_never_harvested_firstNgs_py, np.full((Npatch, 1), False)), axis=1 + (last_sowing_never_harvested_first_n_gs_py, np.full((n_patch, 1), False)), axis=1 ) last_sowing_never_harvested_pym = np.concatenate( ( - np.full((Npatch, Ngs + 1, mxharvests - 1), False), + np.full((n_patch, n_gs + 1, mxharvests - 1), False), np.expand_dims(last_sowing_never_harvested_py, axis=2), ), axis=2, @@ -663,33 +746,36 @@ def convert_axis_time2gs(this_ds, verbose=False, myVars=None, incl_orig=False): sdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf # Convert to growingseason axis - def pym_to_pg(pym, quiet=False): - pg = np.reshape(pym, (pym.shape[0], -1)) - ok_pg = pg[~np.isnan(pg)] + def pym_to_pg(pym_array, quiet=False): + pg_array = np.reshape(pym_array, (pym_array.shape[0], -1)) + ok_pg = pg_array[~np.isnan(pg_array)] if not quiet: print( - f"{ok_pg.size} included; unique N seasons = {np.unique(np.sum(~np.isnan(pg), axis=1))}" + f"{ok_pg.size} included; unique N seasons = " + + f"{np.unique(np.sum(~np.isnan(pg_array), axis=1))}" ) - return pg + return pg_array hdates_pg = pym_to_pg(hdates_pym3.copy(), quiet=~verbose) sdates_pg = pym_to_pg(sdates_pym3.copy(), quiet=True) if verbose: print( - f'After "In years with no sowing, pretend the first no-harvest is meaningful: discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons' + "After 'In years with no sowing, pretend the first no-harvest is meaningful: " + + f"discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons" ) - # "Ignore any harvests that were planted in the final year, because some cells will have incomplete growing seasons for the final year." + # "Ignore any harvests that were planted in the final year, because some cells will have + # incomplete growing seasons for the final year." with np.errstate(invalid="ignore"): hdates_ge_sdates = hdates_pg[:, -mxharvests:] >= sdates_pg[:, -mxharvests:] lastyear_complete_season = hdates_ge_sdates | np.isinf(hdates_pg[:, -mxharvests:]) - def ignore_lastyear_complete_season(pg, excl, mxharvests): - tmp_L = pg[:, :-mxharvests] - tmp_R = pg[:, -mxharvests:] - tmp_R[np.where(excl)] = np.nan - pg = np.concatenate((tmp_L, tmp_R), axis=1) - return pg + def ignore_lastyear_complete_season(pg_array, excl, mxharvests): + tmp_l = pg_array[:, :-mxharvests] + tmp_r = pg_array[:, -mxharvests:] + tmp_r[np.where(excl)] = np.nan + pg_array = np.concatenate((tmp_l, tmp_r), axis=1) + return pg_array hdates_pg2 = ignore_lastyear_complete_season( hdates_pg.copy(), lastyear_complete_season, mxharvests @@ -699,41 +785,45 @@ def ignore_lastyear_complete_season(pg, excl, mxharvests): ) is_valid = ~np.isnan(hdates_pg2) is_fake = np.isneginf(hdates_pg2) - is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], Ngs)) + is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs)) discrepancy = np.sum(is_valid) - expected_valid - unique_Nseasons = np.unique(np.sum(is_valid, axis=1)) + unique_n_seasons = np.unique(np.sum(is_valid, axis=1)) if verbose: print( - f'After "Ignore any harvests that were planted in the final year, because other cells will have incomplete growing seasons for the final year": discrepancy of {discrepancy} patch-seasons' + "After 'Ignore any harvests that were planted in the final year, because other cells " + + "will have incomplete growing seasons for the final year': discrepancy of " + + f"{discrepancy} patch-seasons" ) if "pandas" in sys.modules: - bc = np.bincount(np.sum(is_valid, axis=1)) - bc = bc[bc > 0] - df = pd.DataFrame({"Ngs": unique_Nseasons, "Count": bc}) - print(df) + bincount = np.bincount(np.sum(is_valid, axis=1)) + bincount = bincount[bincount > 0] + dataframe = pd.DataFrame({"Ngs": unique_n_seasons, "Count": bincount}) + print(dataframe) else: - print(f"unique N seasons = {unique_Nseasons}") + print(f"unique N seasons = {unique_n_seasons}") print(" ") # Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out if discrepancy == 0: this_ds_gs = set_up_ds_with_gs_axis(this_ds) - for v in this_ds.data_vars: - if this_ds[v].dims != ("time", "mxharvests", "patch") or (myVars and v not in myVars): + for var in this_ds.data_vars: + if this_ds[var].dims != ("time", "mxharvests", "patch") or ( + my_vars and var not in my_vars + ): continue # Set invalid values to NaN - da_yhp = this_ds[v].copy() + da_yhp = this_ds[var].copy() da_yhp = da_yhp.where(~np.isneginf(da_yhp)) # Remove the nans and reshape to patches*growingseasons da_pyh = da_yhp.transpose("patch", "time", "mxharvests") ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1)) - ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], Ngs)) + ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs)) # Change -infs to nans ar_valid_pg[is_fake] = np.nan # Save as DataArray to new Dataset, stripping _PERHARV from variable name - newname = v.replace("_PERHARV", "") + newname = var.replace("_PERHARV", "") if newname in this_ds_gs: raise RuntimeError(f"{newname} already in dataset!") da_pg = xr.DataArray( @@ -743,14 +833,16 @@ def ignore_lastyear_complete_season(pg, excl, mxharvests): attrs=da_yhp.attrs, ) this_ds_gs[newname] = da_pg - this_ds_gs[newname].attrs["units"] = this_ds[v].attrs["units"] + this_ds_gs[newname].attrs["units"] = this_ds[var].attrs["units"] else: # Print details about example bad patch(es) - if min(unique_Nseasons) < Ngs: - print(f"Too few seasons (min {min(unique_Nseasons)} < {Ngs})") - p = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_Nseasons))[0][0] - print_onepatch_wrongNgs( - p, + if min(unique_n_seasons) < n_gs: + print(f"Too few seasons (min {min(unique_n_seasons)} < {n_gs})") + patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_n_seasons))[ + 0 + ][0] + print_onepatch_wrong_n_gs( + patch_index, this_ds, sdates_ymp, hdates_ymp, @@ -765,11 +857,13 @@ def ignore_lastyear_complete_season(pg, excl, mxharvests): sdates_pg2, hdates_pg2, ) - if max(unique_Nseasons) > Ngs: - print(f"Too many seasons (max {max(unique_Nseasons)} > {Ngs})") - p = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_Nseasons))[0][0] - print_onepatch_wrongNgs( - p, + if max(unique_n_seasons) > n_gs: + print(f"Too many seasons (max {max(unique_n_seasons)} > {n_gs})") + patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_n_seasons))[ + 0 + ][0] + print_onepatch_wrong_n_gs( + patch_index, this_ds, sdates_ymp, hdates_ymp, @@ -785,35 +879,31 @@ def ignore_lastyear_complete_season(pg, excl, mxharvests): hdates_pg2, ) raise RuntimeError( - f"Can't convert time*mxharvests axes to growingseason axis: discrepancy of {discrepancy} patch-seasons" + "Can't convert time*mxharvests axes to growingseason axis: discrepancy of " + + f"{discrepancy} patch-seasons" ) # Preserve units - for v1 in this_ds_gs: - v0 = v1 - if v0 not in this_ds: - v0 += "_PERHARV" - if v0 not in this_ds: + for var_1 in this_ds_gs: + var_0 = var_1 + if var_0 not in this_ds: + var_0 += "_PERHARV" + if var_0 not in this_ds: continue - if "units" in this_ds[v0].attrs: - this_ds_gs[v1].attrs["units"] = this_ds[v0].attrs["units"] + if "units" in this_ds[var_0].attrs: + this_ds_gs[var_1].attrs["units"] = this_ds[var_0].attrs["units"] if incl_orig: return this_ds_gs, this_ds - else: - return this_ds_gs - - -# Minimum harvest threshold allowed in PlantCrop() -# Was 50 before cropcal runs 2023-01-28 -def default_gdd_min(): - return 1.0 + return this_ds_gs -# Get information about extreme gridcells (for debugging) -def get_extreme_info(diff_array, rx_array, mxn, dims, gs, patches1d_lon, patches1d_lat): - if mxn == np.min: - diff_array = np.ma.masked_array(diff_array, mask=(np.abs(diff_array) == 0)) +def get_extreme_info(diff_array, rx_array, mxn, dims, gs_da, patches1d_lon, patches1d_lat): + """ + Get information about extreme gridcells (for debugging) + """ + if mxn == np.min: # pylint: disable=comparison-with-callable + diff_array = np.ma.masked_array(diff_array, mask=np.abs(diff_array) == 0) themxn = mxn(diff_array) # Find the first patch-gs that has the mxn value @@ -821,20 +911,22 @@ def get_extreme_info(diff_array, rx_array, mxn, dims, gs, patches1d_lon, patches first_indices = [x[0] for x in matching_indices] # Get the lon, lat, and growing season of that patch-gs - p = first_indices[dims.index("patch")] - thisLon = patches1d_lon.values[p] - thisLat = patches1d_lat.values[p] - s = first_indices[dims.index("gs")] - thisGS = gs.values[s] + patch_index = first_indices[dims.index("patch")] + this_lon = patches1d_lon.values[patch_index] + this_lat = patches1d_lat.values[patch_index] + season_index = first_indices[dims.index("gs")] + this_gs = gs_da.values[season_index] # Get the prescribed value for this patch-gs - thisRx = rx_array[p][0] + this_rx = rx_array[patch_index][0] - return round(themxn, 3), round(thisLon, 3), round(thisLat, 3), thisGS, round(thisRx) + return round(themxn, 3), round(this_lon, 3), round(this_lat, 3), this_gs, round(this_rx) -# Get growing season lengths from a DataArray of hdate-sdate def get_gs_len_da(this_da): + """ + Get growing season lengths from a DataArray of hdate-sdate + """ tmp = this_da.values with np.errstate(invalid="ignore"): tmp_lt_0 = tmp < 0 @@ -845,13 +937,16 @@ def get_gs_len_da(this_da): def get_pct_harv_at_mature(harvest_reason_da): - Nharv_at_mature = len(np.where(harvest_reason_da.values == 1)[0]) + """ + Get percentage of harvests that happened at maturity + """ + n_harv_at_mature = len(np.where(harvest_reason_da.values == 1)[0]) with np.errstate(invalid="ignore"): harv_reason_gt_0 = harvest_reason_da.values > 0 - Nharv = len(np.where(harv_reason_gt_0)[0]) - if Nharv == 0: + n_harv = len(np.where(harv_reason_gt_0)[0]) + if n_harv == 0: return np.nan - pct_harv_at_mature = Nharv_at_mature / Nharv * 100 + pct_harv_at_mature = n_harv_at_mature / n_harv * 100 pct_harv_at_mature = np.format_float_positional( pct_harv_at_mature, precision=2, unique=False, fractional=False, trim="k" ) # Round to 2 significant digits @@ -859,6 +954,9 @@ def get_pct_harv_at_mature(harvest_reason_da): def import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver): + """ + Import maximum growing season length + """ # Get parameter file pattern = os.path.join(paramfile_dir, f"*{my_clm_ver}_params.{my_clm_subver}.nc") paramfile = glob.glob(pattern) @@ -886,8 +984,12 @@ def import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver): return mxmat_dict -# E.g. import_rx_dates("sdate", sdates_rx_file, dates_ds0_orig) -def import_rx_dates(var_prefix, date_inFile, dates_ds, set_neg1_to_nan=True): +def import_rx_dates(var_prefix, date_infile, dates_ds, set_neg1_to_nan=True): + """ + Import prescribed sowing/harvest dates + + E.g. import_rx_dates("sdate", sdates_rx_file, dates_ds0_orig) + """ # Get run info: # Max number of growing seasons per year if "mxsowings" in dates_ds: @@ -896,53 +998,61 @@ def import_rx_dates(var_prefix, date_inFile, dates_ds, set_neg1_to_nan=True): mxsowings = 1 # Which vegetation types were simulated? - itype_veg_toImport = np.unique(dates_ds.patches1d_itype_veg) + itype_veg_to_import = np.unique(dates_ds.patches1d_itype_veg) - date_varList = [] - for i in itype_veg_toImport: - for g in np.arange(mxsowings): - thisVar = f"{var_prefix}{g+1}_{i}" - date_varList = date_varList + [thisVar] + date_varlist = [] + for i in itype_veg_to_import: + for j in np.arange(mxsowings): + this_var = f"{var_prefix}{j+1}_{i}" + date_varlist = date_varlist + [this_var] - ds = utils.import_ds(date_inFile, myVars=date_varList) + this_ds = utils.import_ds(date_infile, myVars=date_varlist) did_warn = False - for v in ds: - v_new = v.replace(var_prefix, "gs") - ds = ds.rename({v: v_new}) + for var in this_ds: + v_new = var.replace(var_prefix, "gs") + this_ds = this_ds.rename({var: v_new}) # Set -1 prescribed GDD values to NaN. Only warn the first time. - if set_neg1_to_nan and var_prefix == "gdd" and v_new != v and np.any(ds[v_new].values < 0): - if np.any((ds[v_new].values < 0) & (ds[v_new].values != -1)): - raise RuntimeError(f"Unexpected negative value in {v}") + if ( + set_neg1_to_nan + and var_prefix == "gdd" + and v_new != var + and np.any(this_ds[v_new].values < 0) + ): + if np.any((this_ds[v_new].values < 0) & (this_ds[v_new].values != -1)): + raise RuntimeError(f"Unexpected negative value in {var}") if not did_warn: - print(f"Setting -1 rx GDD values to NaN") + print("Setting -1 rx GDD values to NaN") did_warn = True - ds[v_new] = ds[v_new].where(ds[v_new] != -1) + this_ds[v_new] = this_ds[v_new].where(this_ds[v_new] != -1) - return ds + return this_ds def import_output( filename, - myVars, - y1=None, - yN=None, - myVegtypes=utils.define_mgdcrop_list(), + my_vars, + year_1=None, + year_n=None, + my_vegtypes=utils.define_mgdcrop_list(), sdates_rx_ds=None, gdds_rx_ds=None, verbose=False, ): + """ + Import CLM output + """ # Import - this_ds = utils.import_ds(filename, myVars=myVars, myVegtypes=myVegtypes) + this_ds = utils.import_ds(filename, myVars=my_vars, myVegtypes=my_vegtypes) # Trim to years of interest (do not include extra year needed for finishing last growing season) - if y1 and yN: - this_ds = check_and_trim_years(y1, yN, this_ds) + if year_1 and year_n: + this_ds = check_and_trim_years(year_1, year_n, this_ds) else: # Assume including all growing seasons except last complete one are "of interest" - y1 = this_ds.time.values[0].year - yN = this_ds.time.values[-1].year - 2 - this_ds = check_and_trim_years(y1, yN, this_ds) + year_1 = this_ds.time.values[0].year + year_n = this_ds.time.values[-1].year - 2 + this_ds = check_and_trim_years(year_1, year_n, this_ds) # What vegetation types are included? vegtype_list = [ @@ -954,20 +1064,24 @@ def import_output( all_nan = np.full(this_ds[date_vars[0]].shape, True) all_nonpos = np.full(this_ds[date_vars[0]].shape, True) all_pos = np.full(this_ds[date_vars[0]].shape, True) - for v in date_vars: - all_nan = all_nan & np.isnan(this_ds[v].values) + for var in date_vars: + all_nan = all_nan & np.isnan(this_ds[var].values) with np.errstate(invalid="ignore"): - all_nonpos = all_nonpos & (this_ds[v].values <= 0) - all_pos = all_pos & (this_ds[v].values > 0) + all_nonpos = all_nonpos & (this_ds[var].values <= 0) + all_pos = all_pos & (this_ds[var].values > 0) if np.any(np.bitwise_not(all_nan | all_nonpos | all_pos)): raise RuntimeError("Inconsistent missing/present values on mxharvests axis") - # When doing transient runs, it's somehow possible for crops in newly-active patches to be *already alive*. They even have a sowing date (idop)! This will of course not show up in SDATES, but it does show up in SDATES_PERHARV. - # I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going to invalidate those "seasons." + # When doing transient runs, it's somehow possible for crops in newly-active patches to be + # *already alive*. They even have a sowing date (idop)! This will of course not show up in + # SDATES, but it does show up in SDATES_PERHARV. + # I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going + # to invalidate those "seasons." # # In all but the last calendar year, which patches had no sowing? no_sowing_yp = np.all(np.isnan(this_ds.SDATES.values[:-1, :, :]), axis=1) - # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? (Indicates sowing the previous calendar year.) + # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? + # (Indicates sowing the previous calendar year.) with np.errstate(invalid="ignore"): hsdate1_gt_hdate1_yp = ( this_ds.SDATES_PERHARV.values[1:, 0, :] > this_ds.HDATES.values[1:, 0, :] @@ -976,7 +1090,8 @@ def import_output( falsely_alive_yp = no_sowing_yp & hsdate1_gt_hdate1_yp if np.any(falsely_alive_yp): print( - f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the year before harvest, but no sowings occurred that year." + f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the " + + "year before harvest, but no sowings occurred that year." ) falsely_alive_yp = np.concatenate( (np.full((1, this_ds.dims["patch"]), False), falsely_alive_yp), axis=0 @@ -984,52 +1099,57 @@ def import_output( falsely_alive_y1p = np.expand_dims(falsely_alive_yp, axis=1) dummy_false_y1p = np.expand_dims(np.full_like(falsely_alive_yp, False), axis=1) falsely_alive_yhp = np.concatenate((falsely_alive_y1p, dummy_false_y1p), axis=1) - for v in this_ds.data_vars: - if this_ds[v].dims != ("time", "mxharvests", "patch"): + for var in this_ds.data_vars: + if this_ds[var].dims != ("time", "mxharvests", "patch"): continue - this_ds[v] = this_ds[v].where(~falsely_alive_yhp) + this_ds[var] = this_ds[var].where(~falsely_alive_yhp) - def check_no_negative(this_ds_in, varList_no_negative, which_file, verbose=False): - tiny_negOK = 1e-12 + def check_no_negative(this_ds_in, varlist_no_negative, which_file, verbose=False): + tiny_neg_ok = 1e-12 this_ds = this_ds_in.copy() - for v in this_ds: - if not any(x in v for x in varList_no_negative): + for var in this_ds: + if not any(x in var for x in varlist_no_negative): continue - the_min = np.nanmin(this_ds[v].values) + the_min = np.nanmin(this_ds[var].values) if the_min < 0: - if np.abs(the_min) <= tiny_negOK: + if np.abs(the_min) <= tiny_neg_ok: if verbose: print( - f"Tiny negative value(s) in {v} (abs <= {tiny_negOK}) being set to 0 ({which_file})" + f"Tiny negative value(s) in {var} (abs <= {tiny_neg_ok}) being set to 0" + + f" ({which_file})" ) else: print( - f"WARNING: Unexpected negative value(s) in {v}; minimum {the_min} ({which_file})" + f"WARNING: Unexpected negative value(s) in {var}; minimum {the_min} " + + f"({which_file})" ) - values = this_ds[v].copy().values + values = this_ds[var].copy().values with np.errstate(invalid="ignore"): - do_setto_0 = (values < 0) & (values >= -tiny_negOK) + do_setto_0 = (values < 0) & (values >= -tiny_neg_ok) values[np.where(do_setto_0)] = 0 - this_ds[v] = xr.DataArray( - values, coords=this_ds[v].coords, dims=this_ds[v].dims, attrs=this_ds[v].attrs + this_ds[var] = xr.DataArray( + values, + coords=this_ds[var].coords, + dims=this_ds[var].dims, + attrs=this_ds[var].attrs, ) elif verbose: - print(f"No negative value(s) in {v}; min {the_min} ({which_file})") + print(f"No negative value(s) in {var}; min {the_min} ({which_file})") return this_ds - def check_no_zeros(this_ds, varList_no_zero, which_file): - for v in this_ds: - if not any(x in v for x in varList_no_zero): + def check_no_zeros(this_ds, varlist_no_zero, which_file): + for var in this_ds: + if not any(x in var for x in varlist_no_zero): continue - if np.any(this_ds[v].values == 0): - print(f"WARNING: Unexpected zero(s) in {v} ({which_file})") + if np.any(this_ds[var].values == 0): + print(f"WARNING: Unexpected zero(s) in {var} ({which_file})") elif verbose: - print(f"No zero value(s) in {v} ({which_file})") + print(f"No zero value(s) in {var} ({which_file})") # Check for no zero values where there shouldn't be - varList_no_zero = ["DATE", "YEAR"] - check_no_zeros(this_ds, varList_no_zero, "original file") + varlist_no_zero = ["DATE", "YEAR"] + check_no_zeros(this_ds, varlist_no_zero, "original file") # Convert time*mxharvests axes to growingseason axis this_ds_gs = convert_axis_time2gs(this_ds, verbose=verbose, incl_orig=False) @@ -1046,21 +1166,21 @@ def check_no_zeros(this_ds, varList_no_zero, which_file): # Get HUI accumulation as fraction of required this_ds_gs["HUIFRAC"] = this_ds_gs["HUI"] / this_ds_gs["GDDHARV"] this_ds_gs["HUIFRAC_PERHARV"] = this_ds["HUI_PERHARV"] / this_ds["GDDHARV_PERHARV"] - for v in ["HUIFRAC", "HUIFRAC_PERHARV"]: - this_ds_gs[v].attrs["units"] = "Fraction of required" + for var in ["HUIFRAC", "HUIFRAC_PERHARV"]: + this_ds_gs[var].attrs["units"] = "Fraction of required" # Avoid tiny negative values - varList_no_negative = ["GRAIN", "REASON", "GDD", "HUI", "YEAR", "DATE", "GSLEN"] - this_ds_gs = check_no_negative(this_ds_gs, varList_no_negative, "new file", verbose=verbose) + varlist_no_negative = ["GRAIN", "REASON", "GDD", "HUI", "YEAR", "DATE", "GSLEN"] + this_ds_gs = check_no_negative(this_ds_gs, varlist_no_negative, "new file", verbose=verbose) # Check for no zero values where there shouldn't be - varList_no_zero = ["REASON", "DATE"] - check_no_zeros(this_ds_gs, varList_no_zero, "new file") + varlist_no_zero = ["REASON", "DATE"] + check_no_zeros(this_ds_gs, varlist_no_zero, "new file") # Check that e.g., GDDACCUM <= HUI - for vars in [["GDDACCUM", "HUI"], ["SYEARS", "HYEARS"]]: - if all(v in this_ds_gs for v in vars): - check_v0_le_v1(this_ds_gs, vars, both_nan_ok=True, throw_error=True) + for var_list in [["GDDACCUM", "HUI"], ["SYEARS", "HYEARS"]]: + if all(v in this_ds_gs for v in var_list): + check_v0_le_v1(this_ds_gs, var_list, both_nan_ok=True, throw_error=True) # Check that prescribed calendars were obeyed if sdates_rx_ds: @@ -1071,9 +1191,8 @@ def check_no_zeros(this_ds, varList_no_zero, which_file): gdds_rx_ds, this_ds, "this_ds", - "SDATES", "GDDHARV", - gdd_min=default_gdd_min(), + gdd_min=DEFAULT_GDD_MIN, ) # Convert time axis to integer year, saving original as 'cftime' @@ -1092,9 +1211,8 @@ def check_no_zeros(this_ds, varList_no_zero, which_file): return this_ds_gs -# Print information about a patch (for debugging) -def print_onepatch_wrongNgs( - p, +def print_onepatch_wrong_n_gs( + patch_index, this_ds_orig, sdates_ymp, hdates_ymp, @@ -1109,21 +1227,21 @@ def print_onepatch_wrongNgs( sdates_pg2, hdates_pg2, ): - try: - import pandas as pd - except: - print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") + """ + Print information about a patch (for debugging) + """ print( - f"patch {p}: {this_ds_orig.patches1d_itype_veg_str.values[p]}, lon" - f" {this_ds_orig.patches1d_lon.values[p]} lat {this_ds_orig.patches1d_lat.values[p]}" + f"patch {patch_index}: {this_ds_orig.patches1d_itype_veg_str.values[patch_index]}, lon " + f"{this_ds_orig.patches1d_lon.values[patch_index]} lat " + f"{this_ds_orig.patches1d_lat.values[patch_index]}" ) print("Original SDATES (per sowing):") - print(this_ds_orig.SDATES.values[:, :, p]) + print(this_ds_orig.SDATES.values[:, :, patch_index]) print("Original HDATES (per harvest):") - print(this_ds_orig.HDATES.values[:, :, p]) + print(this_ds_orig.HDATES.values[:, :, patch_index]) if "pandas" in sys.modules: @@ -1132,29 +1250,36 @@ def print_pandas_ymp(msg, cols, arrs_tuple): mxharvests = arrs_tuple[0].shape[1] arrs_list2 = [] cols2 = [] - for h in np.arange(mxharvests): - for i, a in enumerate(arrs_tuple): - arrs_list2.append(a[:, h]) - cols2.append(cols[i] + str(h)) + for harvest_index in np.arange(mxharvests): + for i, array in enumerate(arrs_tuple): + arrs_list2.append(array[:, harvest_index]) + cols2.append(cols[i] + str(harvest_index)) arrs_tuple2 = tuple(arrs_list2) - df = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) - df.columns = cols2 - print(df) + dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) + dataframe.columns = cols2 + print(dataframe) print_pandas_ymp( "Original", ["sdate", "hdate"], - (this_ds_orig.SDATES_PERHARV.values[:, :, p], this_ds_orig.HDATES.values[:, :, p]), + ( + this_ds_orig.SDATES_PERHARV.values[:, :, patch_index], + this_ds_orig.HDATES.values[:, :, patch_index], + ), ) - print_pandas_ymp("Masked", ["sdate", "hdate"], (sdates_ymp[:, :, p], hdates_ymp[:, :, p])) + print_pandas_ymp( + "Masked", + ["sdate", "hdate"], + (sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index]), + ) print_pandas_ymp( 'After "Ignore harvests from before this output began"', ["sdate", "hdate"], ( - np.transpose(sdates_pym, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], ), ) @@ -1162,8 +1287,8 @@ def print_pandas_ymp(msg, cols, arrs_tuple): 'After "In years with no sowing, pretend the first no-harvest is meaningful"', ["sdate", "hdate"], ( - np.transpose(sdates_pym2, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym2, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], ), ) @@ -1175,23 +1300,25 @@ def print_pandas_ymp(msg, cols, arrs_tuple): ), ["sdate", "hdate"], ( - np.transpose(sdates_pym3, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym3, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], ), ) def print_pandas_pg(msg, cols, arrs_tuple): print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") arrs_list = list(arrs_tuple) - for i, a in enumerate(arrs_tuple): - arrs_list[i] = np.reshape(a, (-1)) + for i, array in enumerate(arrs_tuple): + arrs_list[i] = np.reshape(array, (-1)) arrs_tuple2 = tuple(arrs_list) - df = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) - df.columns = cols - print(df) + dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) + dataframe.columns = cols + print(dataframe) print_pandas_pg( - "Same, but converted to gs axis", ["sdate", "hdate"], (sdates_pg[p, :], hdates_pg[p, :]) + "Same, but converted to gs axis", + ["sdate", "hdate"], + (sdates_pg[patch_index, :], hdates_pg[patch_index, :]), ) print_pandas_pg( @@ -1200,35 +1327,36 @@ def print_pandas_pg(msg, cols, arrs_tuple): ' will have incomplete growing seasons for the final year"' ), ["sdate", "hdate"], - (sdates_pg2[p, :], hdates_pg2[p, :]), + (sdates_pg2[patch_index, :], hdates_pg2[patch_index, :]), ) else: + print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") - def print_nopandas(a1, a2, msg): + def print_nopandas(array_1, array_2, msg): print(msg) - if a1.ndim == 1: + if array_1.ndim == 1: # I don't know why these aren't side-by-side! - print(np.stack((a1, a2), axis=1)) + print(np.stack((array_1, array_2), axis=1)) else: - print(np.concatenate((a1, a2), axis=1)) + print(np.concatenate((array_1, array_2), axis=1)) - print_nopandas(sdates_ymp[:, :, p], hdates_ymp[:, :, p], "Masked:") + print_nopandas(sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index], "Masked:") print_nopandas( - np.transpose(sdates_pym, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], 'After "Ignore harvests from before this output began"', ) print_nopandas( - np.transpose(sdates_pym2, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym2, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], 'After "In years with no sowing, pretend the first no-harvest is meaningful"', ) print_nopandas( - np.transpose(sdates_pym3, (1, 2, 0))[:, :, p], - np.transpose(hdates_pym3, (1, 2, 0))[:, :, p], + np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], ( 'After "In years with sowing that are followed by inactive years, check whether the' " last sowing was harvested before the patch was deactivated. If not, pretend the" @@ -1236,11 +1364,13 @@ def print_nopandas(a1, a2, msg): ), ) - print_nopandas(sdates_pg[p, :], hdates_pg[p, :], "Same, but converted to gs axis") + print_nopandas( + sdates_pg[patch_index, :], hdates_pg[patch_index, :], "Same, but converted to gs axis" + ) print_nopandas( - sdates_pg2[p, :], - hdates_pg2[p, :], + sdates_pg2[patch_index, :], + hdates_pg2[patch_index, :], ( 'After "Ignore any harvests that were planted in the final year, because some cells' ' will have incomplete growing seasons for the final year"' @@ -1250,14 +1380,18 @@ def print_nopandas(a1, a2, msg): print("\n\n") -# Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. -# Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or mxharvests. def set_up_ds_with_gs_axis(ds_in): + """ + Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. + + Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or + mxharvests. + """ # Get the data variables to include in the new dataset - data_vars = dict() - for v in ds_in.data_vars: - if not any([x in ["mxsowings", "mxharvests"] for x in ds_in[v].dims]): - data_vars[v] = ds_in[v] + data_vars = {} + for var in ds_in.data_vars: + if not any(x in ["mxsowings", "mxharvests"] for x in ds_in[var].dims): + data_vars[var] = ds_in[var] # Set up the new dataset gs_years = [t.year - 1 for t in ds_in.time.values[:-1]] coords = ds_in.coords From 4380ff7f4f49bb465b188471350342d2a87ccaf7 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 30 Jan 2024 21:59:11 -0700 Subject: [PATCH 44/85] Satisfy pylint for check_rxboth_run.py. --- .../ctsm/crop_calendars/check_rxboth_run.py | 47 ++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index 30c280120d..a41ef8d8f7 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -1,12 +1,18 @@ -# %% Setup - +""" +Check the results of a run with prescribed sowing dates and maturity requirements +""" +import sys +import argparse +import glob +import os import numpy as np -import sys, argparse -import cropcal_module as cc -import glob, os +import cropcal_module as cc # pylint: disable=import-error def main(argv): + """ + Main method: Check the results of a run with prescribed sowing dates and maturity requirements + """ # Set arguments parser = argparse.ArgumentParser(description="ADD DESCRIPTION HERE") parser.add_argument( @@ -40,7 +46,7 @@ def main(argv): args = parser.parse_args(argv) # Note that _PERHARV will be stripped off upon import - myVars = [ + my_vars = [ "GRAINC_TO_FOOD_PERHARV", "GRAINC_TO_FOOD_ANN", "SDATES", @@ -67,7 +73,7 @@ def main(argv): case["ds"] = cc.import_output( annual_outfiles, - my_vars=myVars, + my_vars=my_vars, year_1=args.first_usable_year, year_N=args.last_usable_year, ) @@ -84,20 +90,27 @@ def main(argv): # Equalize lons/lats lonlat_tol = 1e-4 - for v in ["rx_sdates_ds", "rx_gdds_ds"]: - if v in case: - for l in ["lon", "lat"]: - max_diff_orig = np.max(np.abs(case[v][l].values - case["ds"][l].values)) + for ds_name in ["rx_sdates_ds", "rx_gdds_ds"]: + if ds_name in case: + for coord_name in ["lon", "lat"]: + max_diff_orig = np.max( + np.abs(case[ds_name][coord_name].values - case["ds"][coord_name].values) + ) if max_diff_orig > lonlat_tol: raise RuntimeError( - f"{v} {l} values differ too much ({max_diff_orig} > {lonlat_tol})" + f"{ds_name} {coord_name} values differ too much ({max_diff_orig} > " + + f"{lonlat_tol})" + ) + if max_diff_orig > 0: + case[ds_name] = case[ds_name].assign_coords( + {coord_name: case["ds"][coord_name].values} + ) + max_diff = np.max( + np.abs(case[ds_name][coord_name].values - case["ds"][coord_name].values) ) - elif max_diff_orig > 0: - case[v] = case[v].assign_coords({l: case["ds"][l].values}) - max_diff = np.max(np.abs(case[v][l].values - case["ds"][l].values)) - print(f"{v} {l} max_diff {max_diff_orig} → {max_diff}") + print(f"{ds_name} {coord_name} max_diff {max_diff_orig} → {max_diff}") else: - print(f"{v} {l} max_diff {max_diff_orig}") + print(f"{ds_name} {coord_name} max_diff {max_diff_orig}") # Check if case["rx_sdates_file"]: From 8397f3c0e41e91ef6bbdb6b4064d6921b332dbad Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 08:14:05 -0700 Subject: [PATCH 45/85] Fix call of import_output() in check_rxboth_run.py. --- python/ctsm/crop_calendars/check_rxboth_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index a41ef8d8f7..c2cf37aa12 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -75,7 +75,7 @@ def main(argv): annual_outfiles, my_vars=my_vars, year_1=args.first_usable_year, - year_N=args.last_usable_year, + year_n=args.last_usable_year, ) cc.check_constant_vars(case["ds"], case, ignore_nan=True, verbose=True, throw_error=True) From 99bd6018268a0cf75f09d6106fdd3200d66fc2bf Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 14:55:46 -0700 Subject: [PATCH 46/85] Remove backticks from assign-to-project.yml --- .github/workflows/assign-to-project.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/assign-to-project.yml b/.github/workflows/assign-to-project.yml index 8c6c259c33..c51a4a6cba 100644 --- a/.github/workflows/assign-to-project.yml +++ b/.github/workflows/assign-to-project.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest name: Assign to High Priority project steps: - - name: Assign issues and pull requests with `priority: high` label to project 25 + - name: Assign issues and pull requests with priority: high label to project 25 uses: srggrs/assign-one-project-github-action@1.3.1 if: | contains(github.event.issue.labels.*.name, 'priority: high') || From 4be7930708cee0311eedae9722deb6b8f61cdb24 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 15:46:34 -0700 Subject: [PATCH 47/85] Satisfy pylint for test_sys_regrid_ggcmi_shdates.py. --- python/ctsm/test/test_sys_regrid_ggcmi_shdates.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py b/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py index 7521ef09a5..6c2e230481 100755 --- a/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py +++ b/python/ctsm/test/test_sys_regrid_ggcmi_shdates.py @@ -5,7 +5,6 @@ """ import os -import re import unittest import tempfile @@ -18,8 +17,7 @@ # -- add python/ctsm to path (needed if we want to run test stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) - - +# pylint: disable=wrong-import-position from ctsm.path_utils import path_to_ctsm_root from ctsm import unit_testing from ctsm.crop_calendars.regrid_ggcmi_shdates import regrid_ggcmi_shdates @@ -78,6 +76,9 @@ def tearDown(self): shutil.rmtree(self._tempdir, ignore_errors=True) def test_regrid_ggcmi_shdates(self): + """ + Tests regrid_ggcmi_shdates + """ # Call script sys.argv = self._function_call_list From 2a533efd0be208034620df310abe08ad4741cc26 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 15:47:55 -0700 Subject: [PATCH 48/85] Satisfy pylint for test_unit_modify_singlept_site_neon.py. --- python/ctsm/test/test_unit_modify_singlept_site_neon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/test/test_unit_modify_singlept_site_neon.py b/python/ctsm/test/test_unit_modify_singlept_site_neon.py index ecd96357b3..3a9d7d424c 100755 --- a/python/ctsm/test/test_unit_modify_singlept_site_neon.py +++ b/python/ctsm/test/test_unit_modify_singlept_site_neon.py @@ -17,7 +17,7 @@ # -- add python/ctsm to path (needed if we want to run the test stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) - +# pylint: disable=wrong-import-position from ctsm.path_utils import path_to_ctsm_root # pylint: disable=wrong-import-position From 3cbe7197d1efced49c6f3360aaaad15d44d2e0ca Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 15:50:57 -0700 Subject: [PATCH 49/85] Satisfy pylint for test_unit_run_sys_tests.py. --- python/ctsm/test/test_unit_run_sys_tests.py | 23 +++++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/python/ctsm/test/test_unit_run_sys_tests.py b/python/ctsm/test/test_unit_run_sys_tests.py index 65ec1df5a5..98a9d54674 100755 --- a/python/ctsm/test/test_unit_run_sys_tests.py +++ b/python/ctsm/test/test_unit_run_sys_tests.py @@ -271,7 +271,7 @@ def test_withDryRun_nothingDone(self): def test_getTestmodList_suite(self): """Ensure that _get_testmod_list() works correctly with suite-style input""" - input = [ + testmod_list_input = [ "clm/default", "clm/default", "clm/crop", @@ -283,12 +283,12 @@ def test_getTestmodList_suite(self): "clm-crop", "clm-cropMonthlyOutput", ] - output = _get_testmod_list(input, unique=False) + output = _get_testmod_list(testmod_list_input, unique=False) self.assertEqual(output, target) def test_getTestmodList_suite_unique(self): """Ensure that _get_testmod_list() works correctly with unique=True""" - input = [ + testmod_list_input = [ "clm/default", "clm/default", "clm/crop", @@ -300,24 +300,29 @@ def test_getTestmodList_suite_unique(self): "clm-cropMonthlyOutput", ] - output = _get_testmod_list(input, unique=True) + output = _get_testmod_list(testmod_list_input, unique=True) self.assertEqual(output, target) def test_getTestmodList_testname(self): """Ensure that _get_testmod_list() works correctly with full test name(s) specified""" - input = [ + testmod_list_input = [ "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-crop", "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default", ] target = ["clm-crop", "clm-default"] - output = _get_testmod_list(input) + output = _get_testmod_list(testmod_list_input) self.assertEqual(output, target) def test_getTestmodList_twomods(self): - """Ensure that _get_testmod_list() works correctly with full test name(s) specified and two mods in one test""" - input = ["ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default--clm-crop"] + """ + Ensure that _get_testmod_list() works correctly with full test name(s) specified and two + mods in one test + """ + testmod_list_input = [ + "ERS_D_Ld15.f45_f45_mg37.I2000Clm50FatesRs.izumi_nag.clm-default--clm-crop" + ] target = ["clm-default", "clm-crop"] - output = _get_testmod_list(input) + output = _get_testmod_list(testmod_list_input) self.assertEqual(output, target) From dbbe8c5ac2967c5e94378549837813dde59e9fd7 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 31 Jan 2024 15:58:38 -0700 Subject: [PATCH 50/85] Satisfy pylint for test_unit_utils_import_coord.py. --- .../ctsm/test/test_unit_utils_import_coord.py | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/python/ctsm/test/test_unit_utils_import_coord.py b/python/ctsm/test/test_unit_utils_import_coord.py index b7ec8f90ec..c5607356fd 100755 --- a/python/ctsm/test/test_unit_utils_import_coord.py +++ b/python/ctsm/test/test_unit_utils_import_coord.py @@ -16,7 +16,7 @@ # -- add python/ctsm to path (needed if we want to run test stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) - +# pylint: disable=wrong-import-position from ctsm import unit_testing from ctsm.path_utils import path_to_ctsm_root from ctsm.ctsm_pylib_dependent_utils import import_coord_1d, import_coord_2d @@ -33,7 +33,9 @@ # Allow all the instance attributes that we need # pylint: disable=too-many-instance-attributes class TestUtilsImportCoord(unittest.TestCase): - # Tests the importcoord* subroutines from utils.py + """ + Tests the importcoord* subroutines from utils.py + """ def setUp(self): """Setup for trying out the methods""" @@ -56,6 +58,9 @@ def tearDown(self): shutil.rmtree(self._tempdir, ignore_errors=True) def test_importcoord1d(self): + """ + Tests importing a 1-d lat/lon variable + """ ds = xr.open_dataset(self._1d_lonlat_file) lat, Nlat = import_coord_1d(ds, "lat") np.testing.assert_equal(Nlat, 360) @@ -63,6 +68,9 @@ def test_importcoord1d(self): np.testing.assert_array_equal(lat.values[-4:], [-88.25, -88.75, -89.25, -89.75]) def test_importcoord1d_attrs(self): + """ + Tests attributes of an imported 1-d lat/lon variable + """ ds = xr.open_dataset(self._1d_lonlat_file) lat, _ = import_coord_1d(ds, "lat") # Unlike import_coord_2d, import_coord_1d doesn't rename the long name. @@ -73,20 +81,29 @@ def test_importcoord1d_attrs(self): self.assertDictEqual(lat.attrs, expected_attributes) def test_importcoord1d_too_many_dims(self): + """ + Tests that 1d-importing function errors when given a 2d variable to import + """ ds = xr.open_dataset(self._2d_lonlat_file) - with self.assertRaisesRegex( + with self.assertRaises( SystemExit, - "Expected 1 dimension for LATIXY; found 2: \('lsmlat', 'lsmlon'\)", + msg="Expected 1 dimension for LATIXY; found 2: ('lsmlat', 'lsmlon')", ): import_coord_1d(ds, "LATIXY") def test_importcoord2d(self): + """ + Tests importing a 2-d lat/lon variable + """ ds = xr.open_dataset(self._2d_lonlat_file) lat, _ = import_coord_2d(ds, "lat", "LATIXY") expected_values = np.array([-13.9, -11.7, -9.5, -7.3, -5.1]).astype(np.float32) np.testing.assert_array_equal(lat.values, expected_values) def test_importcoord2d_attrs(self): + """ + Tests attributes of an imported 2-d lat/lon variable + """ ds = xr.open_dataset(self._2d_lonlat_file) lat, _ = import_coord_2d(ds, "lat", "LATIXY") expected_attributes = { @@ -96,25 +113,34 @@ def test_importcoord2d_attrs(self): self.assertDictEqual(lat.attrs, expected_attributes) def test_importcoord2d_rename_dim(self): + """ + Tests renaming of an imported 2-d lat/lon variable + """ ds = xr.open_dataset(self._2d_lonlat_file) lat, _ = import_coord_2d(ds, "lat", "LATIXY") self.assertTupleEqual(lat.dims, ("lat",)) def test_importcoord2d_no_dim_contains_coordName(self): + """ + Tests that 2d-importing function errors when given a nonexistent dim name + """ ds = xr.open_dataset(self._2d_lonlat_file) ds = ds.rename({"lsmlat": "abc"}) - with self.assertRaisesRegex( + with self.assertRaises( SystemExit, - "ERROR: Expected 1 dimension name containing lat; found 0: \[\]", + msg="ERROR: Expected 1 dimension name containing lat; found 0: []", ): import_coord_2d(ds, "lat", "LATIXY") def test_importcoord2d_1_dim_containing(self): + """ + Tests that 2d-importing function errors when given an ambiguous dim name + """ ds = xr.open_dataset(self._2d_lonlat_file) ds = ds.rename({"lsmlon": "lsmlat2"}) - with self.assertRaisesRegex( + with self.assertRaises( SystemExit, - "Expected 1 dimension name containing lat; found 2: \['lsmlat', 'lsmlat2'\]", + msg="Expected 1 dimension name containing lat; found 2: ['lsmlat', 'lsmlat2']", ): import_coord_2d(ds, "lat", "LATIXY") From 829da767707b8e8576563411e38f5691aa9d10e5 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 1 Feb 2024 08:31:43 -0700 Subject: [PATCH 51/85] Remove a colon from assign-to-project.yml --- .github/workflows/assign-to-project.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/assign-to-project.yml b/.github/workflows/assign-to-project.yml index c51a4a6cba..225c223bde 100644 --- a/.github/workflows/assign-to-project.yml +++ b/.github/workflows/assign-to-project.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest name: Assign to High Priority project steps: - - name: Assign issues and pull requests with priority: high label to project 25 + - name: Assign issues and pull requests with priority-high label to project 25 uses: srggrs/assign-one-project-github-action@1.3.1 if: | contains(github.event.issue.labels.*.name, 'priority: high') || From f87abd05ad40b1793a40e0da4f4648249a9bdb34 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 1 Feb 2024 09:10:44 -0700 Subject: [PATCH 52/85] Satisfy pylint for modify_singlept_site_neon.py. Includes adding a timeout of 60 seconds for requests.get(). --- .../modify_singlept_site_neon.py | 232 ++++++++++-------- 1 file changed, 133 insertions(+), 99 deletions(-) diff --git a/python/ctsm/site_and_regional/modify_singlept_site_neon.py b/python/ctsm/site_and_regional/modify_singlept_site_neon.py index ae1318e2f8..e69a8ab834 100755 --- a/python/ctsm/site_and_regional/modify_singlept_site_neon.py +++ b/python/ctsm/site_and_regional/modify_singlept_site_neon.py @@ -54,6 +54,9 @@ myname = getuser() +# Seconds to wait before requests.get() times out +TIMEOUT = 60 + # -- valid neon sites valid_neon_sites = glob.glob( @@ -176,7 +179,7 @@ def get_neon(neon_dir, site_name): + site_name + "_surfaceData.csv" ) - response = requests.get(url) + response = requests.get(url, timeout=TIMEOUT) with open(neon_file, "wb") as a_file: a_file.write(response.content) @@ -430,7 +433,7 @@ def download_file(url, fname): file name to save the downloaded file. """ try: - response = requests.get(url) + response = requests.get(url, timeout=TIMEOUT) with open(fname, "wb") as a_file: a_file.write(response.content) @@ -443,7 +446,7 @@ def download_file(url, fname): except Exception as err: print("The server could not fulfill the request.") print("Something went wrong in downloading", fname) - print("Error code:", err.code) + raise err def fill_interpolate(f_2, var, method): @@ -472,6 +475,129 @@ def fill_interpolate(f_2, var, method): print("=====================================") +def print_neon_data_soil_structure(obs_bot, soil_bot, bin_index): + """ + Print info about NEON data soil structure + """ + print("================================") + print(" Neon data soil structure: ") + print("================================") + + print("------------", "ground", "------------") + for i, this_obs_bot in enumerate(obs_bot): + print("layer", i) + print("-------------", "{0:.2f}".format(this_obs_bot), "-------------") + + print("================================") + print("Surface data soil structure: ") + print("================================") + + print("------------", "ground", "------------") + for this_bin in range(len(bin_index)): + print("layer", this_bin) + print("-------------", "{0:.2f}".format(soil_bot[this_bin]), "-------------") + + +def print_soil_quality( + inorganic, bin_index, soil_lev, layer_depth, carbon_tot, estimated_oc, bulk_den, f_2 +): + """ + Prints information about soil quality + """ + print("~~~~~~~~~~~~~~~~~~~~~~~~") + print("inorganic:") + print("~~~~~~~~~~~~~~~~~~~~~~~~") + print(inorganic) + print("~~~~~~~~~~~~~~~~~~~~~~~~") + + print("bin_index : ", bin_index[soil_lev]) + print("layer_depth : ", layer_depth) + print("carbon_tot : ", carbon_tot) + print("estimated_oc : ", estimated_oc) + print("bulk_den : ", bulk_den) + print("organic :", f_2["ORGANIC"][soil_lev].values) + print("--------------------------") + + +def update_agri_site_info(site_name, f_2): + """ + Updates agricultural sites + """ + ag_sites = ["KONA", "STER"] + if site_name not in ag_sites: + return f_2 + + print("Updating PCT_NATVEG") + print("Original : ", f_2.PCT_NATVEG.values) + f_2.PCT_NATVEG.values = [[0.0]] + print("Updated : ", f_2.PCT_NATVEG.values) + + print("Updating PCT_CROP") + print("Original : ", f_2.PCT_CROP.values) + f_2.PCT_CROP.values = [[100.0]] + print("Updated : ", f_2.PCT_CROP.values) + + print("Updating PCT_NAT_PFT") + print(f_2.PCT_NAT_PFT.values[0]) + print(f_2.PCT_NAT_PFT[0].values) + + return f_2 + + +def update_fields_with_neon(f_1, d_f, bin_index): + """ + update fields with neon + """ + f_2 = f_1 + soil_levels = f_2["PCT_CLAY"].size + for soil_lev in range(soil_levels): + print("--------------------------") + print("soil_lev:", soil_lev) + print(d_f["clayTotal"][bin_index[soil_lev]]) + f_2["PCT_CLAY"][soil_lev] = d_f["clayTotal"][bin_index[soil_lev]] + f_2["PCT_SAND"][soil_lev] = d_f["sandTotal"][bin_index[soil_lev]] + + bulk_den = d_f["bulkDensExclCoarseFrag"][bin_index[soil_lev]] + carbon_tot = d_f["carbonTot"][bin_index[soil_lev]] + estimated_oc = d_f["estimatedOC"][bin_index[soil_lev]] + + # -- estimated_oc in neon data is rounded to the nearest integer. + # -- Check to make sure the rounded oc is not higher than carbon_tot. + # -- Use carbon_tot if estimated_oc is bigger than carbon_tot. + + estimated_oc = min(estimated_oc, carbon_tot) + + layer_depth = ( + d_f["biogeoBottomDepth"][bin_index[soil_lev]] + - d_f["biogeoTopDepth"][bin_index[soil_lev]] + ) + + # f_2["ORGANIC"][soil_lev] = estimated_oc * bulk_den / 0.58 + + # -- after adding caco3 by NEON: + # -- if caco3 exists: + # -- inorganic = caco3/100.0869*12.0107 + # -- organic = carbon_tot - inorganic + # -- else: + # -- organic = estimated_oc * bulk_den /0.58 + + caco3 = d_f["caco3Conc"][bin_index[soil_lev]] + inorganic = caco3 / 100.0869 * 12.0107 + print("inorganic:", inorganic) + + if not np.isnan(inorganic): + actual_oc = carbon_tot - inorganic + else: + actual_oc = estimated_oc + + f_2["ORGANIC"][soil_lev] = actual_oc * bulk_den / 0.58 + + print_soil_quality( + inorganic, bin_index, soil_lev, layer_depth, carbon_tot, estimated_oc, bulk_den, f_2 + ) + return f_2 + + def main(): """modify_singlept_site_neon main function""" args = get_parser().parse_args() @@ -532,88 +658,10 @@ def main(): bins = d_f["biogeoTopDepth"] / 100 bin_index = np.digitize(soil_mid, bins) - 1 - """ - print ("================================") - print (" Neon data soil structure: ") - print ("================================") - - print ("------------","ground","------------") - for i in range(len(obs_bot)): - print ("layer",i) - print ("-------------", - "{0:.2f}".format(obs_bot[i]), - "-------------") - - print ("================================") - print ("Surface data soil structure: ") - print ("================================") - - print ("------------","ground","------------") - for b in range(len(bin_index)): - print ("layer",b) - print ("-------------", - "{0:.2f}".format(soil_bot[b]), - "-------------") - """ + print_neon_data_soil_structure(obs_bot, soil_bot, bin_index) # -- update fields with neon - f_2 = f_1 - soil_levels = f_2["PCT_CLAY"].size - for soil_lev in range(soil_levels): - print("--------------------------") - print("soil_lev:", soil_lev) - print(d_f["clayTotal"][bin_index[soil_lev]]) - f_2["PCT_CLAY"][soil_lev] = d_f["clayTotal"][bin_index[soil_lev]] - f_2["PCT_SAND"][soil_lev] = d_f["sandTotal"][bin_index[soil_lev]] - - bulk_den = d_f["bulkDensExclCoarseFrag"][bin_index[soil_lev]] - carbon_tot = d_f["carbonTot"][bin_index[soil_lev]] - estimated_oc = d_f["estimatedOC"][bin_index[soil_lev]] - - # -- estimated_oc in neon data is rounded to the nearest integer. - # -- Check to make sure the rounded oc is not higher than carbon_tot. - # -- Use carbon_tot if estimated_oc is bigger than carbon_tot. - - estimated_oc = min(estimated_oc, carbon_tot) - - layer_depth = ( - d_f["biogeoBottomDepth"][bin_index[soil_lev]] - - d_f["biogeoTopDepth"][bin_index[soil_lev]] - ) - - # f_2["ORGANIC"][soil_lev] = estimated_oc * bulk_den / 0.58 - - # -- after adding caco3 by NEON: - # -- if caco3 exists: - # -- inorganic = caco3/100.0869*12.0107 - # -- organic = carbon_tot - inorganic - # -- else: - # -- organic = estimated_oc * bulk_den /0.58 - - caco3 = d_f["caco3Conc"][bin_index[soil_lev]] - inorganic = caco3 / 100.0869 * 12.0107 - print("inorganic:", inorganic) - - if not np.isnan(inorganic): - actual_oc = carbon_tot - inorganic - else: - actual_oc = estimated_oc - - f_2["ORGANIC"][soil_lev] = actual_oc * bulk_den / 0.58 - - print("~~~~~~~~~~~~~~~~~~~~~~~~") - print("inorganic:") - print("~~~~~~~~~~~~~~~~~~~~~~~~") - print(inorganic) - print("~~~~~~~~~~~~~~~~~~~~~~~~") - - print("bin_index : ", bin_index[soil_lev]) - print("layer_depth : ", layer_depth) - print("carbon_tot : ", carbon_tot) - print("estimated_oc : ", estimated_oc) - print("bulk_den : ", bulk_den) - print("organic :", f_2["ORGANIC"][soil_lev].values) - print("--------------------------") + f_2 = update_fields_with_neon(f_1, d_f, bin_index) # -- Interpolate missing values method = "linear" @@ -633,22 +681,8 @@ def main(): sort_print_soil_layers(obs_bot, soil_bot) - # -- updates for ag sites : KONA and STER - ag_sites = ["KONA", "STER"] - if site_name in ag_sites: - print("Updating PCT_NATVEG") - print("Original : ", f_2.PCT_NATVEG.values) - f_2.PCT_NATVEG.values = [[0.0]] - print("Updated : ", f_2.PCT_NATVEG.values) - - print("Updating PCT_CROP") - print("Original : ", f_2.PCT_CROP.values) - f_2.PCT_CROP.values = [[100.0]] - print("Updated : ", f_2.PCT_CROP.values) - - print("Updating PCT_NAT_PFT") - print(f_2.PCT_NAT_PFT.values[0]) - print(f_2.PCT_NAT_PFT[0].values) + # -- updates for ag sites + update_agri_site_info(site_name, f_2) out_dir = args.out_dir From 1949f5c76897016d8d2d6de9ce93eb75cb7a1a51 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 7 Feb 2024 16:32:39 -0700 Subject: [PATCH 53/85] Update parameter files to 240207 (delete _FillValue and history). --- bld/namelist_files/namelist_defaults_ctsm.xml | 6 +++--- .../testdefs/testmods_dirs/clm/ciso_cwd_hr/user_nl_clm | 2 +- py_env_create | 2 +- python/conda_env_ctsm_py.txt | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bld/namelist_files/namelist_defaults_ctsm.xml b/bld/namelist_files/namelist_defaults_ctsm.xml index d5d8bc08eb..e065807741 100644 --- a/bld/namelist_files/namelist_defaults_ctsm.xml +++ b/bld/namelist_files/namelist_defaults_ctsm.xml @@ -484,9 +484,9 @@ attributes from the config_cache.xml file (with keys converted to upper-case). -lnd/clm2/paramdata/ctsm51_params.c240105.nc -lnd/clm2/paramdata/clm50_params.c240105.nc -lnd/clm2/paramdata/clm45_params.c240105.nc +lnd/clm2/paramdata/ctsm51_params.c240207.nc +lnd/clm2/paramdata/clm50_params.c240207.nc +lnd/clm2/paramdata/clm45_params.c240207.nc diff --git a/cime_config/testdefs/testmods_dirs/clm/ciso_cwd_hr/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/ciso_cwd_hr/user_nl_clm index 4e073859be..8135bddf3b 100644 --- a/cime_config/testdefs/testmods_dirs/clm/ciso_cwd_hr/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/ciso_cwd_hr/user_nl_clm @@ -1,2 +1,2 @@ -paramfile = '$DIN_LOC_ROOT/lnd/clm2/paramdata/ctsm51_ciso_cwd_hr_params.c240105.nc' +paramfile = '$DIN_LOC_ROOT/lnd/clm2/paramdata/ctsm51_ciso_cwd_hr_params.c240207.nc' hist_fincl1 = 'CWDC_HR','C13_CWDC_HR','C14_CWDC_HR','CWD_HR_L2','CWD_HR_L2_vr','CWD_HR_L3','CWD_HR_L3_vr' diff --git a/py_env_create b/py_env_create index c323a374df..a2a2bbbcb7 100755 --- a/py_env_create +++ b/py_env_create @@ -23,7 +23,7 @@ if [ $error != 0 ]; then exit -1 fi rm condahelp.txt -ctsm_python=ctsm_pylib +ctsm_python=ctsm_pylib3 condadir="$dir/python" diff --git a/python/conda_env_ctsm_py.txt b/python/conda_env_ctsm_py.txt index e621081591..e469c3e37a 100644 --- a/python/conda_env_ctsm_py.txt +++ b/python/conda_env_ctsm_py.txt @@ -18,9 +18,9 @@ scipy netcdf4 requests packaging -numpy=1.18.5 +numpy xarray=0.17.0 xesmf -numba=0.55.2 # Avoid 0.56 until numpy>=1.20. This is the minimum for xesmf +numba # Avoid 0.56 until numpy>=1.20. This is the minimum for xesmf pylint=2.8.3 black=22.3.0 # NOTE: The version here needs to be coordinated with the black github action under ../.github/workflows From 9511108ad5b5939a4b0b9f4f42946140f97800ca Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Wed, 7 Feb 2024 16:48:10 -0700 Subject: [PATCH 54/85] Revert accidental changes re: ctsm_pylib --- py_env_create | 2 +- python/conda_env_ctsm_py.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/py_env_create b/py_env_create index a2a2bbbcb7..c323a374df 100755 --- a/py_env_create +++ b/py_env_create @@ -23,7 +23,7 @@ if [ $error != 0 ]; then exit -1 fi rm condahelp.txt -ctsm_python=ctsm_pylib3 +ctsm_python=ctsm_pylib condadir="$dir/python" diff --git a/python/conda_env_ctsm_py.txt b/python/conda_env_ctsm_py.txt index e469c3e37a..e621081591 100644 --- a/python/conda_env_ctsm_py.txt +++ b/python/conda_env_ctsm_py.txt @@ -18,9 +18,9 @@ scipy netcdf4 requests packaging -numpy +numpy=1.18.5 xarray=0.17.0 xesmf -numba # Avoid 0.56 until numpy>=1.20. This is the minimum for xesmf +numba=0.55.2 # Avoid 0.56 until numpy>=1.20. This is the minimum for xesmf pylint=2.8.3 black=22.3.0 # NOTE: The version here needs to be coordinated with the black github action under ../.github/workflows From dca964c2cb939ac30e10eca11fb1310f1a5aed21 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 11:28:28 -0700 Subject: [PATCH 55/85] Update ChangeLog and ChangeSum. --- doc/ChangeLog | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++ doc/ChangeSum | 1 + 2 files changed, 53 insertions(+) diff --git a/doc/ChangeLog b/doc/ChangeLog index dbb9b05c84..84787d3087 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,4 +1,56 @@ =============================================================== +Tag name: ctsm5.1.dev167 +Originator(s): samrabin (Sam Rabin, UCAR/TSS, samrabin@ucar.edu) +Date: Thu Feb 8 11:24:15 MST 2024 +One-line Summary: Delete _FillValue and history from parameter files + +Purpose and description of changes +---------------------------------- + +Updates parameter files to c240207. These are the same as c240105 except: +- Attribute _FillValue has been removed from all variables +- Global attributes history, history_of_appended_files, and latest_git_log have been removed + + +Significant changes to scientifically-supported configurations +-------------------------------------------------------------- + +Does this tag change answers significantly for any of the following physics configurations? +(Details of any changes will be given in the "Answer changes" section below.) + +[ ] clm5_1 + +[ ] clm5_0 + +[ ] ctsm5_0-nwp + +[ ] clm4_5 + + +Bugs fixed +---------- + +CTSM issues fixed (include CTSM Issue #): +- Fixes #2347 + + +Testing summary: +---------------- + + regular tests (aux_clm: https://github.com/ESCOMP/CTSM/wiki/System-Testing-Guide#pre-merge-system-testing): + + derecho ----- OK + izumi ------- OK + + +Other details +------------- + +Pull Requests that document the changes (include PR ids): +- #2350: Delete _FillValue and history from parameter files (https://github.com/ESCOMP/CTSM/pull/2350) + +=============================================================== +=============================================================== Tag name: ctsm5.1.dev166 Originator(s): slevis (Samuel Levis,UCAR/TSS,303-665-1310), tking (Teagan King), samrabin (Sam Rabin) Date: Wed 24 Jan 2024 05:39:41 PM MST diff --git a/doc/ChangeSum b/doc/ChangeSum index bfc8b86174..d644cff144 100644 --- a/doc/ChangeSum +++ b/doc/ChangeSum @@ -1,5 +1,6 @@ Tag Who Date Summary ============================================================================================================================ + ctsm5.1.dev167 samrabin 02/08/2024 Delete _FillValue and history from parameter files ctsm5.1.dev166 multiple 01/24/2024 BFB merge tag ctsm5.1.dev165 slevis 01/19/2024 Turn Meier2022, tillage, residue removal on for ctsm5.1, fix #2212 ctsm5.1.dev164 rgknox 01/17/2024 Compatibility and tests for FATES 2-Stream From e4aa2bdac842b0aa105308cbb8068dd55bbf1ede Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 11:47:58 -0700 Subject: [PATCH 56/85] python/Makefile: Do not fail even if pylint isn't clean. --- python/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/Makefile b/python/Makefile index 271e977046..440e2e0de8 100644 --- a/python/Makefile +++ b/python/Makefile @@ -19,7 +19,7 @@ ifneq ($(verbose), not-set) endif PYLINT=pylint -PYLINT_ARGS=-j 4 --rcfile=ctsm/.pylintrc +PYLINT_ARGS=-j 4 --rcfile=ctsm/.pylintrc --fail-under=0 PYLINT_SRC = \ ctsm # NOTE: These don't pass pylint checking and should be added when we put into effort to get them to pass From 8e3b3ba02ad75ab50ba751267627d9136c96464e Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 11:48:31 -0700 Subject: [PATCH 57/85] python/Makefile: Call black before pylint in 'make all'. --- python/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/Makefile b/python/Makefile index 440e2e0de8..b43e1c5e53 100644 --- a/python/Makefile +++ b/python/Makefile @@ -27,7 +27,7 @@ PYLINT_SRC = \ # ../cime_config/buildlib \ # ../cime_config/buildnml -all: test lint black +all: test black lint @echo @echo @echo "Successfully ran all standard tests" From 18e03eac9ebf2da8700dbaa111bc79bcebf2d925 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 13:22:01 -0700 Subject: [PATCH 58/85] Satisfy pylint for ctsm_pylib_dependent_utils.py. --- python/ctsm/ctsm_pylib_dependent_utils.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/python/ctsm/ctsm_pylib_dependent_utils.py b/python/ctsm/ctsm_pylib_dependent_utils.py index 4f149c53a9..59ca15155b 100644 --- a/python/ctsm/ctsm_pylib_dependent_utils.py +++ b/python/ctsm/ctsm_pylib_dependent_utils.py @@ -1,3 +1,7 @@ +""" +Utilities that are dependent on non-standard modules (i.e., require ctsm_pylib). +""" + import numpy as np from ctsm.utils import abort @@ -14,8 +18,10 @@ def import_coord_1d(data_set, coord_name): """ data_array = data_set[coord_name] if len(data_array.dims) != 1: - abort(f"Expected 1 dimension for {coord_name}; " - + f"found {len(data_array.dims)}: {data_array.dims}") + abort( + f"Expected 1 dimension for {coord_name}; " + + f"found {len(data_array.dims)}: {data_array.dims}" + ) return data_array, len(data_array) @@ -37,8 +43,10 @@ def import_coord_2d(data_set, coord_name, var_name): data_array = data_set[var_name] this_dim = [x for x in data_array.dims if coord_name in x] if len(this_dim) != 1: - abort(f"Expected 1 dimension name containing {coord_name}; " - + f"found {len(this_dim)}: {this_dim}") + abort( + f"Expected 1 dimension name containing {coord_name}; " + + f"found {len(this_dim)}: {this_dim}" + ) this_dim = this_dim[0] other_dim = [x for x in data_array.dims if coord_name not in x] if len(other_dim) != 1: From 4d7493afd7a7a382b7eccc3b75221d28df6f4c81 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 13:55:30 -0700 Subject: [PATCH 59/85] Use c240207b parameter files. --- bld/namelist_files/namelist_defaults_ctsm.xml | 6 +++--- .../testdefs/testmods_dirs/clm/ciso_cwd_hr/user_nl_clm | 2 +- doc/ChangeLog | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bld/namelist_files/namelist_defaults_ctsm.xml b/bld/namelist_files/namelist_defaults_ctsm.xml index e065807741..d3b3cc9715 100644 --- a/bld/namelist_files/namelist_defaults_ctsm.xml +++ b/bld/namelist_files/namelist_defaults_ctsm.xml @@ -484,9 +484,9 @@ attributes from the config_cache.xml file (with keys converted to upper-case). -lnd/clm2/paramdata/ctsm51_params.c240207.nc -lnd/clm2/paramdata/clm50_params.c240207.nc -lnd/clm2/paramdata/clm45_params.c240207.nc +lnd/clm2/paramdata/ctsm51_params.c240207b.nc +lnd/clm2/paramdata/clm50_params.c240207b.nc +lnd/clm2/paramdata/clm45_params.c240207b.nc diff --git a/cime_config/testdefs/testmods_dirs/clm/ciso_cwd_hr/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/ciso_cwd_hr/user_nl_clm index 8135bddf3b..c235d72df1 100644 --- a/cime_config/testdefs/testmods_dirs/clm/ciso_cwd_hr/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/ciso_cwd_hr/user_nl_clm @@ -1,2 +1,2 @@ -paramfile = '$DIN_LOC_ROOT/lnd/clm2/paramdata/ctsm51_ciso_cwd_hr_params.c240207.nc' +paramfile = '$DIN_LOC_ROOT/lnd/clm2/paramdata/ctsm51_ciso_cwd_hr_params.c240207b.nc' hist_fincl1 = 'CWDC_HR','C13_CWDC_HR','C14_CWDC_HR','CWD_HR_L2','CWD_HR_L2_vr','CWD_HR_L3','CWD_HR_L3_vr' diff --git a/doc/ChangeLog b/doc/ChangeLog index 84787d3087..084516e23e 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,13 +1,13 @@ =============================================================== Tag name: ctsm5.1.dev167 Originator(s): samrabin (Sam Rabin, UCAR/TSS, samrabin@ucar.edu) -Date: Thu Feb 8 11:24:15 MST 2024 +Date: Thu 08 Feb 2024 01:56:05 PM MST One-line Summary: Delete _FillValue and history from parameter files Purpose and description of changes ---------------------------------- -Updates parameter files to c240207. These are the same as c240105 except: +Updates parameter files to c240207b. These are the same as c240105 except: - Attribute _FillValue has been removed from all variables - Global attributes history, history_of_appended_files, and latest_git_log have been removed From c55dac6b89cd40091426d6205e83ad7e0d59099a Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 15:56:31 -0700 Subject: [PATCH 60/85] Resolve pylint warnings about cropcal_module.py. Moves some functions from there into 3 new modules: * ctsm/crop_calendars/check_constant_vars.py * ctsm/crop_calendars/check_rx_obeyed.py * ctsm/crop_calendars/convert_axis_time2gs.py --- .../crop_calendars/check_constant_vars.py | 386 ++++++ python/ctsm/crop_calendars/check_rx_obeyed.py | 227 ++++ .../ctsm/crop_calendars/check_rxboth_run.py | 20 +- .../crop_calendars/convert_axis_time2gs.py | 631 ++++++++++ python/ctsm/crop_calendars/cropcal_module.py | 1069 ++--------------- 5 files changed, 1351 insertions(+), 982 deletions(-) create mode 100644 python/ctsm/crop_calendars/check_constant_vars.py create mode 100644 python/ctsm/crop_calendars/check_rx_obeyed.py create mode 100644 python/ctsm/crop_calendars/convert_axis_time2gs.py diff --git a/python/ctsm/crop_calendars/check_constant_vars.py b/python/ctsm/crop_calendars/check_constant_vars.py new file mode 100644 index 0000000000..92e1819803 --- /dev/null +++ b/python/ctsm/crop_calendars/check_constant_vars.py @@ -0,0 +1,386 @@ +""" +For variables that should stay constant, make sure they are +""" + +import sys +import os +import numpy as np + +# Import the CTSM Python utilities. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. +_CTSM_PYTHON = os.path.join( + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" +) +sys.path.insert(1, _CTSM_PYTHON) +from ctsm.crop_calendars.cropcal_module import ( # pylint: disable=wrong-import-position + import_rx_dates, +) + + +def check_one_constant_var_setup(this_ds, case, var): + """ + Various setup steps for check_one_constant_var() + """ + if "gs" in this_ds[var].dims: + time_coord = "gs" + elif "time" in this_ds[var].dims: + time_coord = "time" + else: + raise RuntimeError(f"Which of these is the time coordinate? {this_ds[var].dims}") + i_time_coord = this_ds[var].dims.index(time_coord) + + this_da = this_ds[var] + ra_sp = np.moveaxis(this_da.copy().values, i_time_coord, 0) + incl_patches = [] + bad_patches = np.array([]) + str_list = [] + + # Read prescription file, if needed + rx_ds = None + if isinstance(case, dict): + if var == "GDDHARV" and "rx_gdds_file" in case: + rx_ds = import_rx_dates( + "gdd", case["rx_gdds_file"], this_ds, set_neg1_to_nan=False + ).squeeze() + + return time_coord, this_da, ra_sp, incl_patches, str_list, rx_ds, bad_patches + + +def loop_through_bad_patches( + verbose, + emojus, + var, + everything_ok, + str_list, + rx_ds, + time_1, + t1_yr, + t1_vals, + timestep, + t_yr, + t_vals, + bad_patches_this_time, + found_in_rx, + vary_patches, + vary_lons, + vary_lats, + vary_crops, + vary_crops_int, +): + """ + Loop through and check any patches that were "bad" according to check_constant_vars(). + + This is pretty inefficient, but it works. + """ + patch = None # In case bad_patches_this_time is empty + for i, patch in enumerate(bad_patches_this_time): + this_patch = vary_patches[i] + this_lon = vary_lons[i] + this_lat = vary_lats[i] + this_crop = vary_crops[i] + this_crop_int = vary_crops_int[i] + + # If prescribed input had missing value (-1), it's fine for it to vary. + if rx_ds: + rx_var = f"gs1_{this_crop_int}" + if this_lon in rx_ds.lon.values and this_lat in rx_ds.lat.values: + rx_vals = rx_ds[rx_var].sel(lon=this_lon, lat=this_lat).values + n_unique = len(np.unique(rx_vals)) + if n_unique == 1: + found_in_rx[i] = True + if rx_vals == -1: + continue + elif n_unique > 1: + raise RuntimeError( + f"How does lon {this_lon} lat {this_lat} {this_crop} have " + + f"time-varying {var}?" + ) + else: + raise RuntimeError(f"lon {this_lon} lat {this_lat} {this_crop} not in rx dataset?") + + # Print info (or save to print later) + any_bad = True + if verbose: + this_str = ( + f" Patch {this_patch} (lon {this_lon} lat {this_lat}) " + + f"{this_crop} ({this_crop_int})" + ) + if rx_ds and not found_in_rx[i]: + this_str = this_str.replace("(lon", "* (lon") + if not np.isnan(t1_vals[patch]): + t1_val_print = int(t1_vals[patch]) + else: + t1_val_print = "NaN" + if not np.isnan(t_vals[patch]): + t_val_print = int(t_vals[patch]) + else: + t_val_print = "NaN" + if var == "SDATES": + str_list.append( + f"{this_str}: Sowing {t1_yr} jday {t1_val_print}, {t_yr} " + + f"jday {t_val_print}" + ) + else: + str_list.append( + f"{this_str}: {t1_yr} {var} {t1_val_print}, {t_yr} {var} " + f"{t_val_print}" + ) + else: + if everything_ok: + print(f"{emojus} CLM output {var} unexpectedly vary over time:") + everything_ok = False + print(f"{var} timestep {timestep} does not match timestep {time_1}") + break + return any_bad, patch + + +def ensure_all_patches_checked(this_ds, this_da, ra_sp, incl_patches): + """ + In check_one_constant_var(), make sure every patch was checked once (or is all-NaN except + possibly final season) + """ + incl_patches = np.sort(incl_patches) + if not np.array_equal(incl_patches, np.unique(incl_patches)): + raise RuntimeError("Patch(es) checked more than once!") + incl_patches = list(incl_patches) + incl_patches += list( + np.where( + np.all( + np.isnan( + ra_sp[ + :-1, + ] + ), + axis=0, + ) + )[0] + ) + incl_patches = np.sort(incl_patches) + if not np.array_equal(incl_patches, np.unique(incl_patches)): + raise RuntimeError("Patch(es) checked but also all-NaN??") + if not np.array_equal(incl_patches, np.arange(this_ds.dims["patch"])): + for patch in np.arange(this_ds.dims["patch"]): + if patch not in incl_patches: + raise RuntimeError( + f"Not all patches checked! E.g., {patch}: {this_da.isel(patch=patch).values}" + ) + + +def check_one_constant_var_loop_through_timesteps( + this_ds, + ignore_nan, + verbose, + emojus, + var, + everything_ok, + time_coord, + this_da, + str_list, + rx_ds, + time_1, + these_patches, + t1_yr, + t1_vals, +): + """ + In check_one_constant_var(), loop through timesteps + """ + for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]): + t_yr = this_ds[time_coord].values[timestep] + t_vals = np.squeeze(this_da.isel({time_coord: timestep, "patch": these_patches}).values) + ok_p = t1_vals == t_vals + + # If allowed, ignore where either t or t1 is NaN. Should only be used for runs where + # land use varies over time. + if ignore_nan: + ok_p = np.squeeze(np.bitwise_or(ok_p, np.isnan(t1_vals + t_vals))) + + if not np.all(ok_p): + any_bad_before_checking_rx = True + bad_patches_this_time = list(np.where(np.bitwise_not(ok_p))[0]) + bad_patches = np.concatenate( + (bad_patches, np.array(these_patches)[bad_patches_this_time]) + ) + if rx_ds: + found_in_rx = np.array([False for x in bad_patches]) + vary_patches = list(np.array(these_patches)[bad_patches_this_time]) + vary_lons = this_ds.patches1d_lon.values[bad_patches_this_time] + vary_lats = this_ds.patches1d_lat.values[bad_patches_this_time] + vary_crops = this_ds.patches1d_itype_veg_str.values[bad_patches_this_time] + vary_crops_int = this_ds.patches1d_itype_veg.values[bad_patches_this_time] + + any_bad_any_crop = False + for crop_int in np.unique(vary_crops_int): + rx_var = f"gs1_{crop_int}" + vary_lons_this_crop = vary_lons[np.where(vary_crops_int == crop_int)] + vary_lats_this_crop = vary_lats[np.where(vary_crops_int == crop_int)] + these_rx_vals = np.diag( + rx_ds[rx_var].sel(lon=vary_lons_this_crop, lat=vary_lats_this_crop).values + ) + if len(these_rx_vals) != len(vary_lats_this_crop): + raise RuntimeError( + f"Expected {len(vary_lats_this_crop)} rx values; got " + + f"{len(these_rx_vals)}" + ) + if not np.any(these_rx_vals != -1): + continue + any_bad_any_crop = True + break + if not any_bad_any_crop: + continue + + # Loop through and check any patches that were "bad" + any_bad = loop_through_bad_patches( + verbose, + emojus, + var, + everything_ok, + str_list, + rx_ds, + time_1, + t1_yr, + t1_vals, + timestep, + t_yr, + t_vals, + bad_patches_this_time, + found_in_rx, + vary_patches, + vary_lons, + vary_lats, + vary_crops, + vary_crops_int, + ) + + return any_bad_before_checking_rx, bad_patches, found_in_rx, any_bad + + +def check_one_constant_var( + this_ds, case, ignore_nan, verbose, emojus, var, any_bad_before_checking_rx +): + """ + Ensure that a variable that should be constant actually is + """ + everything_ok = True + + ( + time_coord, + this_da, + ra_sp, + incl_patches, + str_list, + rx_ds, + bad_patches, + ) = check_one_constant_var_setup(this_ds, case, var) + + for time_1 in np.arange(this_ds.dims[time_coord] - 1): + condn = ~np.isnan(ra_sp[time_1, ...]) + if time_1 > 0: + condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:time_1, ...]), axis=0)) + these_patches = np.where(condn)[0] + if these_patches.size == 0: + continue + these_patches = list(np.where(condn)[0]) + incl_patches += these_patches + + t1_yr = this_ds[time_coord].values[time_1] + t1_vals = np.squeeze(this_da.isel({time_coord: time_1, "patch": these_patches}).values) + + ( + any_bad_before_checking_rx, + bad_patches, + found_in_rx, + any_bad, + ) = check_one_constant_var_loop_through_timesteps( + this_ds, + ignore_nan, + verbose, + emojus, + var, + everything_ok, + time_coord, + this_da, + str_list, + rx_ds, + time_1, + these_patches, + t1_yr, + t1_vals, + ) + + if verbose and any_bad: + print(f"{emojus} CLM output {var} unexpectedly vary over time:") + str_list.sort() + if rx_ds and np.any(~found_in_rx): + str_list = [ + "*: Not found in prescribed input file (maybe minor lon/lat mismatch)" + ] + str_list + elif not rx_ds: + str_list = ["(No rx file checked)"] + str_list + print("\n".join(str_list)) + + # Make sure every patch was checked once (or is all-NaN except possibly final season) + ensure_all_patches_checked(this_ds, this_da, ra_sp, incl_patches) + + if not any_bad: + if any_bad_before_checking_rx: + print( + f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + + "seasons of output (except for patch(es) with missing rx)." + ) + else: + print( + f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + + "seasons of output." + ) + + return any_bad, any_bad_before_checking_rx, bad_patches + + +def check_constant_vars( + this_ds, case, ignore_nan, const_growing_seasons=None, verbose=True, throw_error=True +): + """ + For variables that should stay constant, make sure they are + """ + if isinstance(case, str): + const_vars = [case] + elif isinstance(case, list): + const_vars = case + elif isinstance(case, dict): + const_vars = case["const_vars"] + else: + raise TypeError(f"case must be str or dict, not {type(case)}") + + if not const_vars: + return None + + if const_growing_seasons: + gs_0 = this_ds.gs.values[0] + gs_n = this_ds.gs.values[-1] + if const_growing_seasons.start > gs_0 or const_growing_seasons.stop < gs_n: + print( + f"❗ Only checking const_vars over {const_growing_seasons.start}-" + + f"{const_growing_seasons.stop} (run includes {gs_0}-{gs_n})" + ) + this_ds = this_ds.sel(gs=const_growing_seasons) + + any_bad = False + any_bad_before_checking_rx = False + if throw_error: + emojus = "❌" + else: + emojus = "❗" + if not isinstance(const_vars, list): + const_vars = [const_vars] + + for var in const_vars: + any_bad, any_bad_before_checking_rx, bad_patches = check_one_constant_var( + this_ds, case, ignore_nan, verbose, emojus, var, any_bad_before_checking_rx + ) + + if any_bad and throw_error: + raise RuntimeError("Stopping due to failed check_constant_vars().") + + bad_patches = np.unique(bad_patches) + return [int(p) for p in bad_patches] diff --git a/python/ctsm/crop_calendars/check_rx_obeyed.py b/python/ctsm/crop_calendars/check_rx_obeyed.py new file mode 100644 index 0000000000..c1ad5cfecc --- /dev/null +++ b/python/ctsm/crop_calendars/check_rx_obeyed.py @@ -0,0 +1,227 @@ +""" +Check that prescribed crop calendars were obeyed +""" + +import sys +import os +import numpy as np + +# Import the CTSM Python utilities. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. +_CTSM_PYTHON = os.path.join( + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" +) +sys.path.insert(1, _CTSM_PYTHON) +import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position +from ctsm.crop_calendars.cropcal_module import ( # pylint: disable=wrong-import-position + DEFAULT_GDD_MIN, +) + + +def get_pct_harv_at_mature(harvest_reason_da): + """ + Get percentage of harvests that happened at maturity + """ + n_harv_at_mature = len(np.where(harvest_reason_da.values == 1)[0]) + with np.errstate(invalid="ignore"): + harv_reason_gt_0 = harvest_reason_da.values > 0 + n_harv = len(np.where(harv_reason_gt_0)[0]) + if n_harv == 0: + return np.nan + pct_harv_at_mature = n_harv_at_mature / n_harv * 100 + pct_harv_at_mature = np.format_float_positional( + pct_harv_at_mature, precision=2, unique=False, fractional=False, trim="k" + ) # Round to 2 significant digits + return pct_harv_at_mature + + +def check_rx_obeyed_handle_gdharv(output_var, gdd_min, ds_thisveg, rx_array): + """ + In check_rx_obeyed(), account for the GDD harvest threshold minimum set in PlantCrop() + """ + if gdd_min is None: + gdd_min = DEFAULT_GDD_MIN + print( + f"gdd_min not provided when doing check_rx_obeyed() for {output_var}; using " + + f"default {gdd_min}" + ) + with np.errstate(invalid="ignore"): + rx_array[(rx_array >= 0) & (rx_array < gdd_min)] = gdd_min + + # ...harvest reason + # 0: Should never happen in any simulation + # 1: Harvesting at maturity + # 2: Harvesting at max season length (mxmat) + # 3: Crop was incorrectly planted in last time step of Dec. 31 + # 4: Today was supposed to be the planting day, but the previous crop still hasn't been + # harvested. + # 5: Harvest the day before the next sowing date this year. + # 6: Same as #5. + # 7: Harvest the day before the next sowing date (today is Dec. 31 and the sowing date + # is Jan. 1) + harvest_reason_da = ds_thisveg["HARVEST_REASON"] + unique_harvest_reasons = np.unique( + harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] + ) + pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) + return gdd_min, unique_harvest_reasons, pct_harv_at_mature + + +def check_rx_obeyed_setup(dates_ds, which_ds, output_var, verbose): + """ + Various setup steps for check_rx_obeyed() + """ + all_ok = 2 + diff_str_list = [] + gdd_tolerance = 1 + + if "GDDHARV" in output_var and verbose: + harvest_reason_da = dates_ds["HARVEST_REASON"] + unique_harvest_reasons = np.unique( + harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] + ) + pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) + print( + f"{which_ds} harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harv at " + + "maturity)" + ) + + return all_ok, diff_str_list, gdd_tolerance + + +def get_extreme_info(diff_array, rx_array, mxn, dims, gs_da, patches1d_lon, patches1d_lat): + """ + Get information about extreme gridcells (for debugging) + """ + if mxn == np.min: # pylint: disable=comparison-with-callable + diff_array = np.ma.masked_array(diff_array, mask=np.abs(diff_array) == 0) + themxn = mxn(diff_array) + + # Find the first patch-gs that has the mxn value + matching_indices = np.where(diff_array == themxn) + first_indices = [x[0] for x in matching_indices] + + # Get the lon, lat, and growing season of that patch-gs + patch_index = first_indices[dims.index("patch")] + this_lon = patches1d_lon.values[patch_index] + this_lat = patches1d_lat.values[patch_index] + season_index = first_indices[dims.index("gs")] + this_gs = gs_da.values[season_index] + + # Get the prescribed value for this patch-gs + this_rx = rx_array[patch_index][0] + + return round(themxn, 3), round(this_lon, 3), round(this_lat, 3), this_gs, round(this_rx) + + +def check_rx_obeyed( + vegtype_list, rx_ds, dates_ds, which_ds, output_var, gdd_min=None, verbose=False +): + """ + Check that prescribed crop calendars were obeyed + """ + all_ok, diff_str_list, gdd_tolerance = check_rx_obeyed_setup( + dates_ds, which_ds, output_var, verbose + ) + + for vegtype_str in vegtype_list: + thisveg_patches = np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] + if thisveg_patches.size == 0: + continue + ds_thisveg = dates_ds.isel(patch=thisveg_patches) + + vegtype_int = utils.vegtype_str2int(vegtype_str)[0] + rx_da = rx_ds[f"gs1_{vegtype_int}"] + rx_array = rx_da.values[ + ds_thisveg.patches1d_jxy.values.astype(int) - 1, + ds_thisveg.patches1d_ixy.values.astype(int) - 1, + ] + rx_array = np.expand_dims(rx_array, axis=1) + sim_array = ds_thisveg[output_var].values + sim_array_dims = ds_thisveg[output_var].dims + + # Ignore patches without prescribed value + with np.errstate(invalid="ignore"): + rx_array[np.where(rx_array < 0)] = np.nan + + # Account for... + if "GDDHARV" in output_var: + # ...GDD harvest threshold minimum set in PlantCrop() + gdd_min, unique_harvest_reasons, pct_harv_at_mature = check_rx_obeyed_handle_gdharv( + output_var, gdd_min, ds_thisveg, rx_array + ) + + if np.any(sim_array != rx_array): + diff_array = sim_array - rx_array + + # Allow negative GDDHARV values when harvest occurred because sowing was scheduled for + # the next day + if output_var == "GDDHARV_PERHARV": + diff_array = np.ma.masked_array( + diff_array, + mask=(diff_array < 0) & (ds_thisveg["HARVEST_REASON_PERHARV"].values == 5), + ) + elif output_var == "GDDHARV": + with np.errstate(invalid="ignore"): + diff_lt_0 = diff_array < 0 + harv_reason_5 = ds_thisveg["HARVEST_REASON"].values == 5 + diff_array = np.ma.masked_array(diff_array, mask=diff_lt_0 & harv_reason_5) + + with np.errstate(invalid="ignore"): + abs_gt_0 = abs(diff_array) > 0 + if np.any(np.abs(diff_array[abs_gt_0]) > 0): + min_diff, min_lon, min_lat, min_gs, min_rx = get_extreme_info( + diff_array, + rx_array, + np.nanmin, + sim_array_dims, + dates_ds.gs, + ds_thisveg.patches1d_lon, + ds_thisveg.patches1d_lat, + ) + max_diff, max_lon, max_lat, max_gs, max_rx = get_extreme_info( + diff_array, + rx_array, + np.nanmax, + sim_array_dims, + dates_ds.gs, + ds_thisveg.patches1d_lon, + ds_thisveg.patches1d_lat, + ) + + diffs_eg_txt = ( + f"{vegtype_str} ({vegtype_int}): diffs range {min_diff} (lon {min_lon}, lat " + + f"{min_lat}, gs {min_gs}, rx ~{min_rx}) to {max_diff} (lon {max_lon}, lat " + + f"{max_lat}, gs {max_gs}, rx ~{max_rx})" + ) + if "GDDHARV" in output_var: + diffs_eg_txt += ( + f"; harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}" + + "% harvested at maturity)" + ) + if "GDDHARV" in output_var and np.nanmax(abs(diff_array)) <= gdd_tolerance: + if all_ok > 0: + all_ok = 1 + diff_str_list.append(f" {diffs_eg_txt}") + else: + all_ok = 0 + if verbose: + print( + f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., " + + f"{diffs_eg_txt}" + ) + else: + break + + if all_ok == 2: + print(f"✅ {which_ds}: Prescribed {output_var} always obeyed") + elif all_ok == 1: + # print(f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable:") + # for x in diff_str_list: print(x) + print( + f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable (diffs <= " + + f"{gdd_tolerance})" + ) + elif not verbose: + print(f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}") diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index c2cf37aa12..126ef98bbc 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -8,6 +8,20 @@ import numpy as np import cropcal_module as cc # pylint: disable=import-error +# Import the CTSM Python utilities. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. +_CTSM_PYTHON = os.path.join( + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" +) +sys.path.insert(1, _CTSM_PYTHON) +from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position + check_rx_obeyed, +) +from ctsm.crop_calendars.check_constant_vars import ( # pylint: disable=wrong-import-position + check_constant_vars, +) + def main(argv): """ @@ -77,7 +91,7 @@ def main(argv): year_1=args.first_usable_year, year_n=args.last_usable_year, ) - cc.check_constant_vars(case["ds"], case, ignore_nan=True, verbose=True, throw_error=True) + check_constant_vars(case["ds"], case, ignore_nan=True, verbose=True, throw_error=True) # Import GGCMI sowing and harvest dates, and check sims casename = "Prescribed Calendars" @@ -114,7 +128,7 @@ def main(argv): # Check if case["rx_sdates_file"]: - cc.check_rx_obeyed( + check_rx_obeyed( case["ds"].vegtype_str.values, case["rx_sdates_ds"].isel(time=0), case["ds"], @@ -122,7 +136,7 @@ def main(argv): "SDATES", ) if case["rx_gdds_file"]: - cc.check_rx_obeyed( + check_rx_obeyed( case["ds"].vegtype_str.values, case["rx_gdds_ds"].isel(time=0), case["ds"], diff --git a/python/ctsm/crop_calendars/convert_axis_time2gs.py b/python/ctsm/crop_calendars/convert_axis_time2gs.py new file mode 100644 index 0000000000..f311d39e05 --- /dev/null +++ b/python/ctsm/crop_calendars/convert_axis_time2gs.py @@ -0,0 +1,631 @@ +""" +Convert time*mxharvests axes to growingseason axis +""" +import warnings +import sys +import os +import numpy as np +import xarray as xr + +# Import the CTSM Python utilities. +# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script +# in the RUN phase seems to require the python/ directory to be manually added to path. +_CTSM_PYTHON = os.path.join( + os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" +) +sys.path.insert(1, _CTSM_PYTHON) + +try: + import pandas as pd +except ModuleNotFoundError: + pass + + +def pym_to_pg(pym_array, quiet=False): + """ + In convert_axis_time2gs(), convert year x month array to growingseason axis + """ + pg_array = np.reshape(pym_array, (pym_array.shape[0], -1)) + ok_pg = pg_array[~np.isnan(pg_array)] + if not quiet: + print( + f"{ok_pg.size} included; unique N seasons = " + + f"{np.unique(np.sum(~np.isnan(pg_array), axis=1))}" + ) + return pg_array + + +def ignore_lastyear_complete_season(pg_array, excl, mxharvests): + """ + Helper function for convert_axis_time2gs() + """ + tmp_l = pg_array[:, :-mxharvests] + tmp_r = pg_array[:, -mxharvests:] + tmp_r[np.where(excl)] = np.nan + pg_array = np.concatenate((tmp_l, tmp_r), axis=1) + return pg_array + + +def convert_axis_time2gs_setup(this_ds, verbose): + """ + Various setup steps for convert_axis_time2gs_setup() + """ + # How many non-NaN patch-seasons do we expect to have once we're done organizing things? + n_patch = this_ds.dims["patch"] + # Because some patches will be planted in the last year but not complete, we have to ignore any + # finalyear-planted seasons that do complete. + n_gs = this_ds.dims["time"] - 1 + expected_valid = n_patch * n_gs + + mxharvests = this_ds.dims["mxharvests"] + + if verbose: + print( + f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} " + + "patch-seasons" + ) + + # Set all non-positive date values to NaN. These are seasons that were never harvested + # (or never started): "non-seasons." + if this_ds.HDATES.dims != ("time", "mxharvests", "patch"): + raise RuntimeError( + "This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not " + + f"{this_ds.HDATES.dims}" + ) + hdates_ymp = this_ds.HDATES.copy().where(this_ds.HDATES > 0).values + hdates_pym = np.transpose(hdates_ymp.copy(), (2, 0, 1)) + sdates_ymp = this_ds.SDATES_PERHARV.copy().where(this_ds.SDATES_PERHARV > 0).values + sdates_pym = np.transpose(sdates_ymp.copy(), (2, 0, 1)) + with np.errstate(invalid="ignore"): + hdates_pym[hdates_pym <= 0] = np.nan + return n_patch, n_gs, expected_valid, mxharvests, hdates_ymp, hdates_pym, sdates_ymp, sdates_pym + + +def set_up_ds_with_gs_axis(ds_in): + """ + Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. + + Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or + mxharvests. + """ + # Get the data variables to include in the new dataset + data_vars = {} + for var in ds_in.data_vars: + if not any(x in ["mxsowings", "mxharvests"] for x in ds_in[var].dims): + data_vars[var] = ds_in[var] + # Set up the new dataset + gs_years = [t.year - 1 for t in ds_in.time.values[:-1]] + coords = ds_in.coords + coords["gs"] = gs_years + ds_out = xr.Dataset(data_vars=data_vars, coords=coords, attrs=ds_in.attrs) + return ds_out + + +def print_onepatch_wrong_n_gs( + patch_index, + this_ds_orig, + sdates_ymp, + hdates_ymp, + sdates_pym, + hdates_pym, + sdates_pym2, + hdates_pym2, + sdates_pym3, + hdates_pym3, + sdates_pg, + hdates_pg, + sdates_pg2, + hdates_pg2, +): + """ + Print information about a patch (for debugging) + """ + + print( + f"patch {patch_index}: {this_ds_orig.patches1d_itype_veg_str.values[patch_index]}, lon " + f"{this_ds_orig.patches1d_lon.values[patch_index]} lat " + f"{this_ds_orig.patches1d_lat.values[patch_index]}" + ) + + print("Original SDATES (per sowing):") + print(this_ds_orig.SDATES.values[:, :, patch_index]) + + print("Original HDATES (per harvest):") + print(this_ds_orig.HDATES.values[:, :, patch_index]) + + if "pandas" in sys.modules: + + def print_pandas_ymp(msg, cols, arrs_tuple): + print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") + mxharvests = arrs_tuple[0].shape[1] + arrs_list2 = [] + cols2 = [] + for harvest_index in np.arange(mxharvests): + for i, array in enumerate(arrs_tuple): + arrs_list2.append(array[:, harvest_index]) + cols2.append(cols[i] + str(harvest_index)) + arrs_tuple2 = tuple(arrs_list2) + dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) + dataframe.columns = cols2 + print(dataframe) + + print_pandas_ymp( + "Original", + ["sdate", "hdate"], + ( + this_ds_orig.SDATES_PERHARV.values[:, :, patch_index], + this_ds_orig.HDATES.values[:, :, patch_index], + ), + ) + + print_pandas_ymp( + "Masked", + ["sdate", "hdate"], + (sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index]), + ) + + print_pandas_ymp( + 'After "Ignore harvests from before this output began"', + ["sdate", "hdate"], + ( + np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], + ), + ) + + print_pandas_ymp( + 'After "In years with no sowing, pretend the first no-harvest is meaningful"', + ["sdate", "hdate"], + ( + np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], + ), + ) + + print_pandas_ymp( + ( + 'After "In years with sowing that are followed by inactive years, check whether the' + " last sowing was harvested before the patch was deactivated. If not, pretend the" + ' LAST no-harvest is meaningful."' + ), + ["sdate", "hdate"], + ( + np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], + ), + ) + + def print_pandas_pg(msg, cols, arrs_tuple): + print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") + arrs_list = list(arrs_tuple) + for i, array in enumerate(arrs_tuple): + arrs_list[i] = np.reshape(array, (-1)) + arrs_tuple2 = tuple(arrs_list) + dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) + dataframe.columns = cols + print(dataframe) + + print_pandas_pg( + "Same, but converted to gs axis", + ["sdate", "hdate"], + (sdates_pg[patch_index, :], hdates_pg[patch_index, :]), + ) + + print_pandas_pg( + ( + 'After "Ignore any harvests that were planted in the final year, because some cells' + ' will have incomplete growing seasons for the final year"' + ), + ["sdate", "hdate"], + (sdates_pg2[patch_index, :], hdates_pg2[patch_index, :]), + ) + else: + print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") + + def print_nopandas(array_1, array_2, msg): + print(msg) + if array_1.ndim == 1: + # I don't know why these aren't side-by-side! + print(np.stack((array_1, array_2), axis=1)) + else: + print(np.concatenate((array_1, array_2), axis=1)) + + print_nopandas(sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index], "Masked:") + + print_nopandas( + np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], + 'After "Ignore harvests from before this output began"', + ) + + print_nopandas( + np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], + 'After "In years with no sowing, pretend the first no-harvest is meaningful"', + ) + + print_nopandas( + np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], + np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], + ( + 'After "In years with sowing that are followed by inactive years, check whether the' + " last sowing was harvested before the patch was deactivated. If not, pretend the" + ' LAST [easier to implement!] no-harvest is meaningful."' + ), + ) + + print_nopandas( + sdates_pg[patch_index, :], hdates_pg[patch_index, :], "Same, but converted to gs axis" + ) + + print_nopandas( + sdates_pg2[patch_index, :], + hdates_pg2[patch_index, :], + ( + 'After "Ignore any harvests that were planted in the final year, because some cells' + ' will have incomplete growing seasons for the final year"' + ), + ) + + print("\n\n") + + +def handle_years_with_no_sowing(this_ds, mxharvests, hdates_pym, sdates_pym): + """ + In years with no sowing, pretend the first no-harvest is meaningful, unless that was + intentionally ignored earlier in convert_axis_time2gs(). + """ + sdates_orig_ymp = this_ds.SDATES.copy().values + sdates_orig_pym = np.transpose(sdates_orig_ymp.copy(), (2, 0, 1)) + hdates_pym2 = hdates_pym.copy() + sdates_pym2 = sdates_pym.copy() + with np.errstate(invalid="ignore"): + sdates_gt_0 = sdates_orig_pym > 0 + nosow_py = np.all(~sdates_gt_0, axis=2) + nosow_py_1st = nosow_py & np.isnan(hdates_pym[:, :, 0]) + where_nosow_py_1st = np.where(nosow_py_1st) + hdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf + sdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf + for harvest_index in np.arange(mxharvests - 1): + if harvest_index == 0: + continue + if harvest_index == 1: + print("Warning: Untested with mxharvests > 2") + where_nosow_py = np.where( + nosow_py + & ~np.any(np.isnan(hdates_pym[:, :, 0:harvest_index]), axis=2) + & np.isnan(hdates_pym[:, :, harvest_index]) + ) + hdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf + sdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf + return sdates_orig_pym, hdates_pym2, sdates_pym2 + + +def handle_years_with_sowing_then_inactive( + verbose, + n_patch, + n_gs, + expected_valid, + mxharvests, + inactive_py, + sdates_orig_pym, + hdates_pym2, + sdates_pym2, +): + """ + In years with sowing that are followed by inactive years, check whether the last sowing was + harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] + no-harvest is meaningful. + """ + sdates_orig_masked_pym = sdates_orig_pym.copy() + with np.errstate(invalid="ignore"): + sdates_le_0 = sdates_orig_masked_pym <= 0 + sdates_orig_masked_pym[np.where(sdates_le_0)] = np.nan + with warnings.catch_warnings(): + warnings.filterwarnings(action="ignore", message="All-NaN slice encountered") + last_sdate_first_n_gs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) + last_hdate_first_n_gs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) + with np.errstate(invalid="ignore"): + hdate_lt_sdate = last_hdate_first_n_gs_py < last_sdate_first_n_gs_py + last_sowing_not_harvested_sameyear_first_n_gs_py = hdate_lt_sdate | np.isnan( + last_hdate_first_n_gs_py + ) + inactive_last_n_gs_py = inactive_py[:, 1:] + last_sowing_never_harvested_first_n_gs_py = ( + last_sowing_not_harvested_sameyear_first_n_gs_py & inactive_last_n_gs_py + ) + last_sowing_never_harvested_py = np.concatenate( + (last_sowing_never_harvested_first_n_gs_py, np.full((n_patch, 1), False)), axis=1 + ) + last_sowing_never_harvested_pym = np.concatenate( + ( + np.full((n_patch, n_gs + 1, mxharvests - 1), False), + np.expand_dims(last_sowing_never_harvested_py, axis=2), + ), + axis=2, + ) + where_last_sowing_never_harvested_pym = last_sowing_never_harvested_pym + hdates_pym3 = hdates_pym2.copy() + sdates_pym3 = sdates_pym2.copy() + hdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf + sdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf + + hdates_pg = pym_to_pg(hdates_pym3.copy(), quiet=~verbose) + sdates_pg = pym_to_pg(sdates_pym3.copy(), quiet=True) + if verbose: + print( + "After 'In years with no sowing, pretend the first no-harvest is meaningful: " + + f"discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons" + ) + + return hdates_pym3, sdates_pym3, hdates_pg, sdates_pg + + +def ignore_harvests_planted_in_final_year( + this_ds, verbose, n_gs, expected_valid, mxharvests, hdates_pg, sdates_pg +): + """ + Ignore any harvests that were planted in the final year, because some cells will have + incomplete growing seasons for the final year. + """ + with np.errstate(invalid="ignore"): + hdates_ge_sdates = hdates_pg[:, -mxharvests:] >= sdates_pg[:, -mxharvests:] + lastyear_complete_season = hdates_ge_sdates | np.isinf(hdates_pg[:, -mxharvests:]) + + hdates_pg2 = ignore_lastyear_complete_season( + hdates_pg.copy(), lastyear_complete_season, mxharvests + ) + sdates_pg2 = ignore_lastyear_complete_season( + sdates_pg.copy(), lastyear_complete_season, mxharvests + ) + is_valid = ~np.isnan(hdates_pg2) + is_fake = np.isneginf(hdates_pg2) + is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs)) + discrepancy = np.sum(is_valid) - expected_valid + unique_n_seasons = np.unique(np.sum(is_valid, axis=1)) + if verbose: + print( + "After 'Ignore any harvests that were planted in the final year, because other cells " + + "will have incomplete growing seasons for the final year': discrepancy of " + + f"{discrepancy} patch-seasons" + ) + if "pandas" in sys.modules: + bincount = np.bincount(np.sum(is_valid, axis=1)) + bincount = bincount[bincount > 0] + dataframe = pd.DataFrame({"Ngs": unique_n_seasons, "Count": bincount}) + print(dataframe) + else: + print(f"unique N seasons = {unique_n_seasons}") + print(" ") + return hdates_pg2, sdates_pg2, is_valid, is_fake, discrepancy, unique_n_seasons + + +def create_dataset( + this_ds, + my_vars, + n_gs, + hdates_ymp, + hdates_pym, + sdates_ymp, + sdates_pym, + hdates_pym2, + sdates_pym2, + hdates_pym3, + sdates_pym3, + hdates_pg, + sdates_pg, + hdates_pg2, + sdates_pg2, + is_valid, + is_fake, + discrepancy, + unique_n_seasons, +): + """ + Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out + """ + if discrepancy == 0: + this_ds_gs = set_up_ds_with_gs_axis(this_ds) + for var in this_ds.data_vars: + if this_ds[var].dims != ("time", "mxharvests", "patch") or ( + my_vars and var not in my_vars + ): + continue + + # Set invalid values to NaN + da_yhp = this_ds[var].copy() + da_yhp = da_yhp.where(~np.isneginf(da_yhp)) + + # Remove the nans and reshape to patches*growingseasons + da_pyh = da_yhp.transpose("patch", "time", "mxharvests") + ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1)) + ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs)) + # Change -infs to nans + ar_valid_pg[is_fake] = np.nan + # Save as DataArray to new Dataset, stripping _PERHARV from variable name + newname = var.replace("_PERHARV", "") + if newname in this_ds_gs: + raise RuntimeError(f"{newname} already in dataset!") + da_pg = xr.DataArray( + data=ar_valid_pg, + coords=[this_ds_gs.coords["patch"], this_ds_gs.coords["gs"]], + name=newname, + attrs=da_yhp.attrs, + ) + this_ds_gs[newname] = da_pg + this_ds_gs[newname].attrs["units"] = this_ds[var].attrs["units"] + else: + # Print details about example bad patch(es) + if min(unique_n_seasons) < n_gs: + print(f"Too few seasons (min {min(unique_n_seasons)} < {n_gs})") + patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_n_seasons))[ + 0 + ][0] + print_onepatch_wrong_n_gs( + patch_index, + this_ds, + sdates_ymp, + hdates_ymp, + sdates_pym, + hdates_pym, + sdates_pym2, + hdates_pym2, + sdates_pym3, + hdates_pym3, + sdates_pg, + hdates_pg, + sdates_pg2, + hdates_pg2, + ) + if max(unique_n_seasons) > n_gs: + print(f"Too many seasons (max {max(unique_n_seasons)} > {n_gs})") + patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_n_seasons))[ + 0 + ][0] + print_onepatch_wrong_n_gs( + patch_index, + this_ds, + sdates_ymp, + hdates_ymp, + sdates_pym, + hdates_pym, + sdates_pym2, + hdates_pym2, + sdates_pym3, + hdates_pym3, + sdates_pg, + hdates_pg, + sdates_pg2, + hdates_pg2, + ) + raise RuntimeError( + "Can't convert time*mxharvests axes to growingseason axis: discrepancy of " + + f"{discrepancy} patch-seasons" + ) + + # Preserve units + for var_1 in this_ds_gs: + var_0 = var_1 + if var_0 not in this_ds: + var_0 += "_PERHARV" + if var_0 not in this_ds: + continue + if "units" in this_ds[var_0].attrs: + this_ds_gs[var_1].attrs["units"] = this_ds[var_0].attrs["units"] + return this_ds_gs + + +def convert_axis_time2gs(this_ds, verbose=False, my_vars=None, incl_orig=False): + """ + Convert time*mxharvests axes to growingseason axis + """ + + ( + n_patch, + n_gs, + expected_valid, + mxharvests, + hdates_ymp, + hdates_pym, + sdates_ymp, + sdates_pym, + ) = convert_axis_time2gs_setup(this_ds, verbose) + + # Find years where patch was inactive + inactive_py = np.transpose( + np.isnan(this_ds.HDATES).all(dim="mxharvests").values + & np.isnan(this_ds.SDATES_PERHARV).all(dim="mxharvests").values + ) + # Find seasons that were planted while the patch was inactive + with np.errstate(invalid="ignore"): + sown_inactive_py = inactive_py[:, :-1] & (hdates_pym[:, 1:, 0] < sdates_pym[:, 1:, 0]) + sown_inactive_py = np.concatenate((np.full((n_patch, 1), False), sown_inactive_py), axis=1) + + # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was + # inactive" + with np.errstate(invalid="ignore"): + first_season_before_first_year_p = hdates_pym[:, 0, 0] < sdates_pym[:, 0, 0] + first_season_before_first_year_py = np.full(hdates_pym.shape[:-1], fill_value=False) + first_season_before_first_year_py[:, 0] = first_season_before_first_year_p + sown_prerun_or_inactive_py = first_season_before_first_year_py | sown_inactive_py + sown_prerun_or_inactive_pym = np.concatenate( + ( + np.expand_dims(sown_prerun_or_inactive_py, axis=2), + np.full((n_patch, n_gs + 1, mxharvests - 1), False), + ), + axis=2, + ) + where_sown_prerun_or_inactive_pym = np.where(sown_prerun_or_inactive_pym) + hdates_pym[where_sown_prerun_or_inactive_pym] = np.nan + sdates_pym[where_sown_prerun_or_inactive_pym] = np.nan + if verbose: + print( + "After 'Ignore harvests from before this output began: discrepancy of " + + f"{np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons'" + ) + + # We need to keep some non-seasons---it's possible that "the yearY growing season" never + # happened (sowing conditions weren't met), but we still need something there so that we can + # make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to + # -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. + + # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was + # intentionally ignored above." + sdates_orig_pym, hdates_pym2, sdates_pym2 = handle_years_with_no_sowing( + this_ds, mxharvests, hdates_pym, sdates_pym + ) + + # "In years with sowing that are followed by inactive years, check whether the last sowing was + # harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] + # no-harvest is meaningful." + hdates_pym3, sdates_pym3, hdates_pg, sdates_pg = handle_years_with_sowing_then_inactive( + verbose, + n_patch, + n_gs, + expected_valid, + mxharvests, + inactive_py, + sdates_orig_pym, + hdates_pym2, + sdates_pym2, + ) + + # "Ignore any harvests that were planted in the final year, because some cells will have + # incomplete growing seasons for the final year." + ( + hdates_pg2, + sdates_pg2, + is_valid, + is_fake, + discrepancy, + unique_n_seasons, + ) = ignore_harvests_planted_in_final_year( + this_ds, verbose, n_gs, expected_valid, mxharvests, hdates_pg, sdates_pg + ) + + # Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out + this_ds_gs = create_dataset( + this_ds, + my_vars, + n_gs, + hdates_ymp, + hdates_pym, + sdates_ymp, + sdates_pym, + hdates_pym2, + sdates_pym2, + hdates_pym3, + sdates_pym3, + hdates_pg, + sdates_pg, + hdates_pg2, + sdates_pg2, + is_valid, + is_fake, + discrepancy, + unique_n_seasons, + ) + + if incl_orig: + return this_ds_gs, this_ds + return this_ds_gs diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index 4fa3cdf5aa..aa3c5d469e 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -1,9 +1,7 @@ """ Helper functions for various crop calendar stuff """ -# pylint: disable=too-many-lines -import warnings import sys import os import glob @@ -18,11 +16,12 @@ ) sys.path.insert(1, _CTSM_PYTHON) import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position - -try: - import pandas as pd -except ModuleNotFoundError: - pass +from ctsm.crop_calendars.convert_axis_time2gs import ( # pylint: disable=wrong-import-position + convert_axis_time2gs, +) +from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position + check_rx_obeyed, +) # Define conversion multipliers, {from: {to1, to2, ...}, ...} @@ -193,396 +192,6 @@ def open_lu_ds(filename, year_1, year_n, existing_ds, ungrid=True): return this_ds -def check_constant_vars( - this_ds, case, ignore_nan, const_growing_seasons=None, verbose=True, throw_error=True -): - """ - For variables that should stay constant, make sure they are - """ - if isinstance(case, str): - const_vars = [case] - elif isinstance(case, list): - const_vars = case - elif isinstance(case, dict): - const_vars = case["const_vars"] - else: - raise TypeError(f"case must be str or dict, not {type(case)}") - - if not const_vars: - return None - - if const_growing_seasons: - gs_0 = this_ds.gs.values[0] - gs_n = this_ds.gs.values[-1] - if const_growing_seasons.start > gs_0 or const_growing_seasons.stop < gs_n: - print( - f"❗ Only checking const_vars over {const_growing_seasons.start}-" - + f"{const_growing_seasons.stop} (run includes {gs_0}-{gs_n})" - ) - this_ds = this_ds.sel(gs=const_growing_seasons) - - any_bad = False - any_bad_before_checking_rx = False - if throw_error: - emojus = "❌" - else: - emojus = "❗" - if not isinstance(const_vars, list): - const_vars = [const_vars] - - for var in const_vars: - everything_ok = True - - if "gs" in this_ds[var].dims: - time_coord = "gs" - elif "time" in this_ds[var].dims: - time_coord = "time" - else: - raise RuntimeError(f"Which of these is the time coordinate? {this_ds[var].dims}") - i_time_coord = this_ds[var].dims.index(time_coord) - - this_da = this_ds[var] - ra_sp = np.moveaxis(this_da.copy().values, i_time_coord, 0) - incl_patches = [] - bad_patches = np.array([]) - str_list = [] - - # Read prescription file, if needed - rx_ds = None - if isinstance(case, dict): - if var == "GDDHARV" and "rx_gdds_file" in case: - rx_ds = import_rx_dates( - "gdd", case["rx_gdds_file"], this_ds, set_neg1_to_nan=False - ).squeeze() - - for time_1 in np.arange(this_ds.dims[time_coord] - 1): - condn = ~np.isnan(ra_sp[time_1, ...]) - if time_1 > 0: - condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:time_1, ...]), axis=0)) - these_patches = np.where(condn)[0] - if these_patches.size == 0: - continue - these_patches = list(np.where(condn)[0]) - incl_patches += these_patches - # print(f't1 {t1}: {thesePatches}') - - t1_yr = this_ds[time_coord].values[time_1] - t1_vals = np.squeeze(this_da.isel({time_coord: time_1, "patch": these_patches}).values) - - for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]): - t_yr = this_ds[time_coord].values[timestep] - t_vals = np.squeeze( - this_da.isel({time_coord: timestep, "patch": these_patches}).values - ) - ok_p = t1_vals == t_vals - - # If allowed, ignore where either t or t1 is NaN. Should only be used for runs where - # land use varies over time. - if ignore_nan: - ok_p = np.squeeze(np.bitwise_or(ok_p, np.isnan(t1_vals + t_vals))) - - if not np.all(ok_p): - any_bad_before_checking_rx = True - bad_patches_this_time = list(np.where(np.bitwise_not(ok_p))[0]) - bad_patches = np.concatenate( - (bad_patches, np.array(these_patches)[bad_patches_this_time]) - ) - if rx_ds: - found_in_rx = np.array([False for x in bad_patches]) - vary_patches = list(np.array(these_patches)[bad_patches_this_time]) - vary_lons = this_ds.patches1d_lon.values[bad_patches_this_time] - vary_lats = this_ds.patches1d_lat.values[bad_patches_this_time] - vary_crops = this_ds.patches1d_itype_veg_str.values[bad_patches_this_time] - vary_crops_int = this_ds.patches1d_itype_veg.values[bad_patches_this_time] - - any_bad_any_crop = False - for crop_int in np.unique(vary_crops_int): - rx_var = f"gs1_{crop_int}" - vary_lons_this_crop = vary_lons[np.where(vary_crops_int == crop_int)] - vary_lats_this_crop = vary_lats[np.where(vary_crops_int == crop_int)] - these_rx_vals = np.diag( - rx_ds[rx_var] - .sel(lon=vary_lons_this_crop, lat=vary_lats_this_crop) - .values - ) - if len(these_rx_vals) != len(vary_lats_this_crop): - raise RuntimeError( - f"Expected {len(vary_lats_this_crop)} rx values; got " - + f"{len(these_rx_vals)}" - ) - if not np.any(these_rx_vals != -1): - continue - any_bad_any_crop = True - break - if not any_bad_any_crop: - continue - - # This bit is pretty inefficient, but I'm not going to optimize it until I - # actually need to use it. - for i, patch in enumerate(bad_patches_this_time): - this_patch = vary_patches[i] - this_lon = vary_lons[i] - this_lat = vary_lats[i] - this_crop = vary_crops[i] - this_crop_int = vary_crops_int[i] - - # If prescribed input had missing value (-1), it's fine for it to vary. - if rx_ds: - rx_var = f"gs1_{this_crop_int}" - if this_lon in rx_ds.lon.values and this_lat in rx_ds.lat.values: - rx_vals = rx_ds[rx_var].sel(lon=this_lon, lat=this_lat).values - n_unique = len(np.unique(rx_vals)) - if n_unique == 1: - found_in_rx[i] = True - if rx_vals == -1: - continue - elif n_unique > 1: - raise RuntimeError( - f"How does lon {this_lon} lat {this_lat} {this_crop} have " - + f"time-varying {var}?" - ) - else: - raise RuntimeError( - f"lon {this_lon} lat {this_lat} {this_crop} not in rx dataset?" - ) - - # Print info (or save to print later) - any_bad = True - if verbose: - this_str = ( - f" Patch {this_patch} (lon {this_lon} lat {this_lat}) " - + f"{this_crop} ({this_crop_int})" - ) - if rx_ds and not found_in_rx[i]: - this_str = this_str.replace("(lon", "* (lon") - if not np.isnan(t1_vals[patch]): - t1_val_print = int(t1_vals[patch]) - else: - t1_val_print = "NaN" - if not np.isnan(t_vals[patch]): - t_val_print = int(t_vals[patch]) - else: - t_val_print = "NaN" - if var == "SDATES": - str_list.append( - f"{this_str}: Sowing {t1_yr} jday {t1_val_print}, {t_yr} " - + f"jday {t_val_print}" - ) - else: - str_list.append( - f"{this_str}: {t1_yr} {var} {t1_val_print}, {t_yr} {var} " - + f"{t_val_print}" - ) - else: - if everything_ok: - print(f"{emojus} CLM output {var} unexpectedly vary over time:") - everything_ok = False - print(f"{var} timestep {timestep} does not match timestep {time_1}") - break - if verbose and any_bad: - print(f"{emojus} CLM output {var} unexpectedly vary over time:") - str_list.sort() - if rx_ds and np.any(~found_in_rx): - str_list = [ - "*: Not found in prescribed input file (maybe minor lon/lat mismatch)" - ] + str_list - elif not rx_ds: - str_list = ["(No rx file checked)"] + str_list - print("\n".join(str_list)) - - # Make sure every patch was checked once (or is all-NaN except possibly final season) - incl_patches = np.sort(incl_patches) - if not np.array_equal(incl_patches, np.unique(incl_patches)): - raise RuntimeError("Patch(es) checked more than once!") - incl_patches = list(incl_patches) - incl_patches += list( - np.where( - np.all( - np.isnan( - ra_sp[ - :-1, - ] - ), - axis=0, - ) - )[0] - ) - incl_patches = np.sort(incl_patches) - if not np.array_equal(incl_patches, np.unique(incl_patches)): - raise RuntimeError("Patch(es) checked but also all-NaN??") - if not np.array_equal(incl_patches, np.arange(this_ds.dims["patch"])): - for patch in np.arange(this_ds.dims["patch"]): - if patch not in incl_patches: - break - raise RuntimeError( - f"Not all patches checked! E.g., {patch}: {this_da.isel(patch=patch).values}" - ) - - if not any_bad: - if any_bad_before_checking_rx: - print( - f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " - + "seasons of output (except for patch(es) with missing rx)." - ) - else: - print( - f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " - + "seasons of output." - ) - - if any_bad and throw_error: - raise RuntimeError("Stopping due to failed check_constant_vars().") - - bad_patches = np.unique(bad_patches) - return [int(p) for p in bad_patches] - - -def check_rx_obeyed( - vegtype_list, rx_ds, dates_ds, which_ds, output_var, gdd_min=None, verbose=False -): - """ - Check that prescribed crop calendars were obeyed - """ - all_ok = 2 - diff_str_list = [] - gdd_tolerance = 1 - - if "GDDHARV" in output_var and verbose: - harvest_reason_da = dates_ds["HARVEST_REASON"] - unique_harvest_reasons = np.unique( - harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] - ) - pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) - print( - f"{which_ds} harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}% harv at " - + "maturity)" - ) - - for vegtype_str in vegtype_list: - thisveg_patches = np.where(dates_ds.patches1d_itype_veg_str == vegtype_str)[0] - if thisveg_patches.size == 0: - continue - ds_thisveg = dates_ds.isel(patch=thisveg_patches) - patch_inds_lon_thisveg = ds_thisveg.patches1d_ixy.values.astype(int) - 1 - patch_inds_lat_thisveg = ds_thisveg.patches1d_jxy.values.astype(int) - 1 - patch_lons_thisveg = ds_thisveg.patches1d_lon - patch_lats_thisveg = ds_thisveg.patches1d_lat - - vegtype_int = utils.vegtype_str2int(vegtype_str)[0] - rx_da = rx_ds[f"gs1_{vegtype_int}"] - rx_array = rx_da.values[patch_inds_lat_thisveg, patch_inds_lon_thisveg] - rx_array = np.expand_dims(rx_array, axis=1) - sim_array = ds_thisveg[output_var].values - sim_array_dims = ds_thisveg[output_var].dims - - # Ignore patches without prescribed value - with np.errstate(invalid="ignore"): - rx_array[np.where(rx_array < 0)] = np.nan - - # Account for... - if "GDDHARV" in output_var: - # ...GDD harvest threshold minimum set in PlantCrop() - if gdd_min is None: - gdd_min = DEFAULT_GDD_MIN - print( - f"gdd_min not provided when doing check_rx_obeyed() for {output_var}; using " - + f"default {gdd_min}" - ) - with np.errstate(invalid="ignore"): - rx_array[(rx_array >= 0) & (rx_array < gdd_min)] = gdd_min - - # ...harvest reason - # 0: Should never happen in any simulation - # 1: Harvesting at maturity - # 2: Harvesting at max season length (mxmat) - # 3: Crop was incorrectly planted in last time step of Dec. 31 - # 4: Today was supposed to be the planting day, but the previous crop still hasn't been - # harvested. - # 5: Harvest the day before the next sowing date this year. - # 6: Same as #5. - # 7: Harvest the day before the next sowing date (today is Dec. 31 and the sowing date - # is Jan. 1) - harvest_reason_da = ds_thisveg["HARVEST_REASON"] - unique_harvest_reasons = np.unique( - harvest_reason_da.values[np.where(~np.isnan(harvest_reason_da.values))] - ) - pct_harv_at_mature = get_pct_harv_at_mature(harvest_reason_da) - - if np.any(sim_array != rx_array): - diff_array = sim_array - rx_array - - # Allow negative GDDHARV values when harvest occurred because sowing was scheduled for - # the next day - if output_var == "GDDHARV_PERHARV": - diff_array = np.ma.masked_array( - diff_array, - mask=(diff_array < 0) & (ds_thisveg["HARVEST_REASON_PERHARV"].values == 5), - ) - elif output_var == "GDDHARV": - with np.errstate(invalid="ignore"): - diff_lt_0 = diff_array < 0 - harv_reason_5 = ds_thisveg["HARVEST_REASON"].values == 5 - diff_array = np.ma.masked_array(diff_array, mask=diff_lt_0 & harv_reason_5) - - with np.errstate(invalid="ignore"): - abs_gt_0 = abs(diff_array) > 0 - if np.any(np.abs(diff_array[abs_gt_0]) > 0): - min_diff, min_lon, min_lat, min_gs, min_rx = get_extreme_info( - diff_array, - rx_array, - np.nanmin, - sim_array_dims, - dates_ds.gs, - patch_lons_thisveg, - patch_lats_thisveg, - ) - max_diff, max_lon, max_lat, max_gs, max_rx = get_extreme_info( - diff_array, - rx_array, - np.nanmax, - sim_array_dims, - dates_ds.gs, - patch_lons_thisveg, - patch_lats_thisveg, - ) - - diffs_eg_txt = ( - f"{vegtype_str} ({vegtype_int}): diffs range {min_diff} (lon {min_lon}, lat " - + f"{min_lat}, gs {min_gs}, rx ~{min_rx}) to {max_diff} (lon {max_lon}, lat " - + f"{max_lat}, gs {max_gs}, rx ~{max_rx})" - ) - if "GDDHARV" in output_var: - diffs_eg_txt += ( - f"; harvest reasons: {unique_harvest_reasons} ({pct_harv_at_mature}" - + "% harvested at maturity)" - ) - if "GDDHARV" in output_var and np.nanmax(abs(diff_array)) <= gdd_tolerance: - if all_ok > 0: - all_ok = 1 - diff_str_list.append(f" {diffs_eg_txt}") - else: - all_ok = 0 - if verbose: - print( - f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., " - + f"{diffs_eg_txt}" - ) - else: - break - - if all_ok == 2: - print(f"✅ {which_ds}: Prescribed {output_var} always obeyed") - elif all_ok == 1: - # print(f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable:") - # for x in diff_str_list: print(x) - print( - f"🟨 {which_ds}: Prescribed {output_var} *not* always obeyed, but acceptable (diffs <= " - + f"{gdd_tolerance})" - ) - elif not verbose: - print(f"❌ {which_ds}: Prescribed {output_var} *not* always obeyed. E.g., {diffs_eg_txt}") - - def check_v0_le_v1(this_ds, var_list, msg_txt=" ", both_nan_ok=False, throw_error=False): """ Make sure that, e.g., GDDACCUM_PERHARV is always <= HUI_PERHARV @@ -612,317 +221,6 @@ def check_v0_le_v1(this_ds, var_list, msg_txt=" ", both_nan_ok=False, throw_erro raise RuntimeError(msg) -def convert_axis_time2gs(this_ds, verbose=False, my_vars=None, incl_orig=False): - """ - Convert time*mxharvests axes to growingseason axis - """ - # How many non-NaN patch-seasons do we expect to have once we're done organizing things? - n_patch = this_ds.dims["patch"] - # Because some patches will be planted in the last year but not complete, we have to ignore any - # finalyear-planted seasons that do complete. - n_gs = this_ds.dims["time"] - 1 - expected_valid = n_patch * n_gs - - mxharvests = this_ds.dims["mxharvests"] - - if verbose: - print( - f"Start: discrepancy of {np.sum(~np.isnan(this_ds.HDATES.values)) - expected_valid} " - + "patch-seasons" - ) - - # Set all non-positive date values to NaN. These are seasons that were never harvested - # (or never started): "non-seasons." - if this_ds.HDATES.dims != ("time", "mxharvests", "patch"): - raise RuntimeError( - "This code relies on HDATES dims ('time', 'mxharvests', 'patch'), not " - + f"{this_ds.HDATES.dims}" - ) - hdates_ymp = this_ds.HDATES.copy().where(this_ds.HDATES > 0).values - hdates_pym = np.transpose(hdates_ymp.copy(), (2, 0, 1)) - sdates_ymp = this_ds.SDATES_PERHARV.copy().where(this_ds.SDATES_PERHARV > 0).values - sdates_pym = np.transpose(sdates_ymp.copy(), (2, 0, 1)) - with np.errstate(invalid="ignore"): - hdates_pym[hdates_pym <= 0] = np.nan - - # Find years where patch was inactive - inactive_py = np.transpose( - np.isnan(this_ds.HDATES).all(dim="mxharvests").values - & np.isnan(this_ds.SDATES_PERHARV).all(dim="mxharvests").values - ) - # Find seasons that were planted while the patch was inactive - with np.errstate(invalid="ignore"): - sown_inactive_py = inactive_py[:, :-1] & (hdates_pym[:, 1:, 0] < sdates_pym[:, 1:, 0]) - sown_inactive_py = np.concatenate((np.full((n_patch, 1), False), sown_inactive_py), axis=1) - - # "Ignore harvests from seasons sown (a) before this output began or (b) when the crop was - # inactive" - with np.errstate(invalid="ignore"): - first_season_before_first_year_p = hdates_pym[:, 0, 0] < sdates_pym[:, 0, 0] - first_season_before_first_year_py = np.full(hdates_pym.shape[:-1], fill_value=False) - first_season_before_first_year_py[:, 0] = first_season_before_first_year_p - sown_prerun_or_inactive_py = first_season_before_first_year_py | sown_inactive_py - sown_prerun_or_inactive_pym = np.concatenate( - ( - np.expand_dims(sown_prerun_or_inactive_py, axis=2), - np.full((n_patch, n_gs + 1, mxharvests - 1), False), - ), - axis=2, - ) - where_sown_prerun_or_inactive_pym = np.where(sown_prerun_or_inactive_pym) - hdates_pym[where_sown_prerun_or_inactive_pym] = np.nan - sdates_pym[where_sown_prerun_or_inactive_pym] = np.nan - if verbose: - print( - "After 'Ignore harvests from before this output began: discrepancy of " - + f"{np.sum(~np.isnan(hdates_pym)) - expected_valid} patch-seasons'" - ) - - # We need to keep some non-seasons---it's possible that "the yearY growing season" never - # happened (sowing conditions weren't met), but we still need something there so that we can - # make an array of dimension Npatch*Ngs. We do this by changing those non-seasons from NaN to - # -Inf before doing the filtering and reshaping, after which we'll convert them back to NaNs. - - # "In years with no sowing, pretend the first no-harvest is meaningful, unless that was - # intentionally ignored above." - sdates_orig_ymp = this_ds.SDATES.copy().values - sdates_orig_pym = np.transpose(sdates_orig_ymp.copy(), (2, 0, 1)) - hdates_pym2 = hdates_pym.copy() - sdates_pym2 = sdates_pym.copy() - with np.errstate(invalid="ignore"): - sdates_gt_0 = sdates_orig_pym > 0 - nosow_py = np.all(~sdates_gt_0, axis=2) - nosow_py_1st = nosow_py & np.isnan(hdates_pym[:, :, 0]) - where_nosow_py_1st = np.where(nosow_py_1st) - hdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf - sdates_pym2[where_nosow_py_1st[0], where_nosow_py_1st[1], 0] = -np.inf - for harvest_index in np.arange(mxharvests - 1): - if harvest_index == 0: - continue - elif harvest_index == 1: - print("Warning: Untested with mxharvests > 2") - where_nosow_py = np.where( - nosow_py - & ~np.any(np.isnan(hdates_pym[:, :, 0:harvest_index]), axis=2) - & np.isnan(hdates_pym[:, :, harvest_index]) - ) - hdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf - sdates_pym2[where_nosow_py[0], where_nosow_py[1], harvest_index + 1] = -np.inf - - # "In years with sowing that are followed by inactive years, check whether the last sowing was - # harvested before the patch was deactivated. If not, pretend the LAST [easier to implement!] - # no-harvest is meaningful." - sdates_orig_masked_pym = sdates_orig_pym.copy() - with np.errstate(invalid="ignore"): - sdates_le_0 = sdates_orig_masked_pym <= 0 - sdates_orig_masked_pym[np.where(sdates_le_0)] = np.nan - with warnings.catch_warnings(): - warnings.filterwarnings(action="ignore", message="All-NaN slice encountered") - last_sdate_first_n_gs_py = np.nanmax(sdates_orig_masked_pym[:, :-1, :], axis=2) - last_hdate_first_n_gs_py = np.nanmax(hdates_pym2[:, :-1, :], axis=2) - with np.errstate(invalid="ignore"): - hdate_lt_sdate = last_hdate_first_n_gs_py < last_sdate_first_n_gs_py - last_sowing_not_harvested_sameyear_first_n_gs_py = hdate_lt_sdate | np.isnan( - last_hdate_first_n_gs_py - ) - inactive_last_n_gs_py = inactive_py[:, 1:] - last_sowing_never_harvested_first_n_gs_py = ( - last_sowing_not_harvested_sameyear_first_n_gs_py & inactive_last_n_gs_py - ) - last_sowing_never_harvested_py = np.concatenate( - (last_sowing_never_harvested_first_n_gs_py, np.full((n_patch, 1), False)), axis=1 - ) - last_sowing_never_harvested_pym = np.concatenate( - ( - np.full((n_patch, n_gs + 1, mxharvests - 1), False), - np.expand_dims(last_sowing_never_harvested_py, axis=2), - ), - axis=2, - ) - where_last_sowing_never_harvested_pym = last_sowing_never_harvested_pym - hdates_pym3 = hdates_pym2.copy() - sdates_pym3 = sdates_pym2.copy() - hdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf - sdates_pym3[where_last_sowing_never_harvested_pym] = -np.inf - - # Convert to growingseason axis - def pym_to_pg(pym_array, quiet=False): - pg_array = np.reshape(pym_array, (pym_array.shape[0], -1)) - ok_pg = pg_array[~np.isnan(pg_array)] - if not quiet: - print( - f"{ok_pg.size} included; unique N seasons = " - + f"{np.unique(np.sum(~np.isnan(pg_array), axis=1))}" - ) - return pg_array - - hdates_pg = pym_to_pg(hdates_pym3.copy(), quiet=~verbose) - sdates_pg = pym_to_pg(sdates_pym3.copy(), quiet=True) - if verbose: - print( - "After 'In years with no sowing, pretend the first no-harvest is meaningful: " - + f"discrepancy of {np.sum(~np.isnan(hdates_pg)) - expected_valid} patch-seasons" - ) - - # "Ignore any harvests that were planted in the final year, because some cells will have - # incomplete growing seasons for the final year." - with np.errstate(invalid="ignore"): - hdates_ge_sdates = hdates_pg[:, -mxharvests:] >= sdates_pg[:, -mxharvests:] - lastyear_complete_season = hdates_ge_sdates | np.isinf(hdates_pg[:, -mxharvests:]) - - def ignore_lastyear_complete_season(pg_array, excl, mxharvests): - tmp_l = pg_array[:, :-mxharvests] - tmp_r = pg_array[:, -mxharvests:] - tmp_r[np.where(excl)] = np.nan - pg_array = np.concatenate((tmp_l, tmp_r), axis=1) - return pg_array - - hdates_pg2 = ignore_lastyear_complete_season( - hdates_pg.copy(), lastyear_complete_season, mxharvests - ) - sdates_pg2 = ignore_lastyear_complete_season( - sdates_pg.copy(), lastyear_complete_season, mxharvests - ) - is_valid = ~np.isnan(hdates_pg2) - is_fake = np.isneginf(hdates_pg2) - is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs)) - discrepancy = np.sum(is_valid) - expected_valid - unique_n_seasons = np.unique(np.sum(is_valid, axis=1)) - if verbose: - print( - "After 'Ignore any harvests that were planted in the final year, because other cells " - + "will have incomplete growing seasons for the final year': discrepancy of " - + f"{discrepancy} patch-seasons" - ) - if "pandas" in sys.modules: - bincount = np.bincount(np.sum(is_valid, axis=1)) - bincount = bincount[bincount > 0] - dataframe = pd.DataFrame({"Ngs": unique_n_seasons, "Count": bincount}) - print(dataframe) - else: - print(f"unique N seasons = {unique_n_seasons}") - print(" ") - - # Create Dataset with time axis as "gs" (growing season) instead of what CLM puts out - if discrepancy == 0: - this_ds_gs = set_up_ds_with_gs_axis(this_ds) - for var in this_ds.data_vars: - if this_ds[var].dims != ("time", "mxharvests", "patch") or ( - my_vars and var not in my_vars - ): - continue - - # Set invalid values to NaN - da_yhp = this_ds[var].copy() - da_yhp = da_yhp.where(~np.isneginf(da_yhp)) - - # Remove the nans and reshape to patches*growingseasons - da_pyh = da_yhp.transpose("patch", "time", "mxharvests") - ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1)) - ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs)) - # Change -infs to nans - ar_valid_pg[is_fake] = np.nan - # Save as DataArray to new Dataset, stripping _PERHARV from variable name - newname = var.replace("_PERHARV", "") - if newname in this_ds_gs: - raise RuntimeError(f"{newname} already in dataset!") - da_pg = xr.DataArray( - data=ar_valid_pg, - coords=[this_ds_gs.coords["patch"], this_ds_gs.coords["gs"]], - name=newname, - attrs=da_yhp.attrs, - ) - this_ds_gs[newname] = da_pg - this_ds_gs[newname].attrs["units"] = this_ds[var].attrs["units"] - else: - # Print details about example bad patch(es) - if min(unique_n_seasons) < n_gs: - print(f"Too few seasons (min {min(unique_n_seasons)} < {n_gs})") - patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == min(unique_n_seasons))[ - 0 - ][0] - print_onepatch_wrong_n_gs( - patch_index, - this_ds, - sdates_ymp, - hdates_ymp, - sdates_pym, - hdates_pym, - sdates_pym2, - hdates_pym2, - sdates_pym3, - hdates_pym3, - sdates_pg, - hdates_pg, - sdates_pg2, - hdates_pg2, - ) - if max(unique_n_seasons) > n_gs: - print(f"Too many seasons (max {max(unique_n_seasons)} > {n_gs})") - patch_index = np.where(np.sum(~np.isnan(hdates_pg2), axis=1) == max(unique_n_seasons))[ - 0 - ][0] - print_onepatch_wrong_n_gs( - patch_index, - this_ds, - sdates_ymp, - hdates_ymp, - sdates_pym, - hdates_pym, - sdates_pym2, - hdates_pym2, - sdates_pym3, - hdates_pym3, - sdates_pg, - hdates_pg, - sdates_pg2, - hdates_pg2, - ) - raise RuntimeError( - "Can't convert time*mxharvests axes to growingseason axis: discrepancy of " - + f"{discrepancy} patch-seasons" - ) - - # Preserve units - for var_1 in this_ds_gs: - var_0 = var_1 - if var_0 not in this_ds: - var_0 += "_PERHARV" - if var_0 not in this_ds: - continue - if "units" in this_ds[var_0].attrs: - this_ds_gs[var_1].attrs["units"] = this_ds[var_0].attrs["units"] - - if incl_orig: - return this_ds_gs, this_ds - return this_ds_gs - - -def get_extreme_info(diff_array, rx_array, mxn, dims, gs_da, patches1d_lon, patches1d_lat): - """ - Get information about extreme gridcells (for debugging) - """ - if mxn == np.min: # pylint: disable=comparison-with-callable - diff_array = np.ma.masked_array(diff_array, mask=np.abs(diff_array) == 0) - themxn = mxn(diff_array) - - # Find the first patch-gs that has the mxn value - matching_indices = np.where(diff_array == themxn) - first_indices = [x[0] for x in matching_indices] - - # Get the lon, lat, and growing season of that patch-gs - patch_index = first_indices[dims.index("patch")] - this_lon = patches1d_lon.values[patch_index] - this_lat = patches1d_lat.values[patch_index] - season_index = first_indices[dims.index("gs")] - this_gs = gs_da.values[season_index] - - # Get the prescribed value for this patch-gs - this_rx = rx_array[patch_index][0] - - return round(themxn, 3), round(this_lon, 3), round(this_lat, 3), this_gs, round(this_rx) - - def get_gs_len_da(this_da): """ Get growing season lengths from a DataArray of hdate-sdate @@ -936,23 +234,6 @@ def get_gs_len_da(this_da): return this_da -def get_pct_harv_at_mature(harvest_reason_da): - """ - Get percentage of harvests that happened at maturity - """ - n_harv_at_mature = len(np.where(harvest_reason_da.values == 1)[0]) - with np.errstate(invalid="ignore"): - harv_reason_gt_0 = harvest_reason_da.values > 0 - n_harv = len(np.where(harv_reason_gt_0)[0]) - if n_harv == 0: - return np.nan - pct_harv_at_mature = n_harv_at_mature / n_harv * 100 - pct_harv_at_mature = np.format_float_positional( - pct_harv_at_mature, precision=2, unique=False, fractional=False, trim="k" - ) # Round to 2 significant digits - return pct_harv_at_mature - - def import_max_gs_length(paramfile_dir, my_clm_ver, my_clm_subver): """ Import maximum growing season length @@ -1030,6 +311,57 @@ def import_rx_dates(var_prefix, date_infile, dates_ds, set_neg1_to_nan=True): return this_ds +def check_no_negative(this_ds_in, varlist_no_negative, which_file, verbose): + """ + In import_output(), check that there are no unexpected negative values. + """ + tiny_neg_ok = 1e-12 + this_ds = this_ds_in.copy() + for var in this_ds: + if not any(x in var for x in varlist_no_negative): + continue + the_min = np.nanmin(this_ds[var].values) + if the_min < 0: + if np.abs(the_min) <= tiny_neg_ok: + if verbose: + print( + f"Tiny negative value(s) in {var} (abs <= {tiny_neg_ok}) being set to 0" + + f" ({which_file})" + ) + else: + print( + f"WARNING: Unexpected negative value(s) in {var}; minimum {the_min} " + + f"({which_file})" + ) + values = this_ds[var].copy().values + with np.errstate(invalid="ignore"): + do_setto_0 = (values < 0) & (values >= -tiny_neg_ok) + values[np.where(do_setto_0)] = 0 + this_ds[var] = xr.DataArray( + values, + coords=this_ds[var].coords, + dims=this_ds[var].dims, + attrs=this_ds[var].attrs, + ) + + elif verbose: + print(f"No negative value(s) in {var}; min {the_min} ({which_file})") + return this_ds + + +def check_no_zeros(this_ds, varlist_no_zero, which_file, verbose): + """ + In import_output(), check that there are no unexpected zeros. + """ + for var in this_ds: + if not any(x in var for x in varlist_no_zero): + continue + if np.any(this_ds[var].values == 0): + print(f"WARNING: Unexpected zero(s) in {var} ({which_file})") + elif verbose: + print(f"No zero value(s) in {var} ({which_file})") + + def import_output( filename, my_vars, @@ -1077,79 +409,11 @@ def import_output( # SDATES, but it does show up in SDATES_PERHARV. # I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going # to invalidate those "seasons." - # - # In all but the last calendar year, which patches had no sowing? - no_sowing_yp = np.all(np.isnan(this_ds.SDATES.values[:-1, :, :]), axis=1) - # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? - # (Indicates sowing the previous calendar year.) - with np.errstate(invalid="ignore"): - hsdate1_gt_hdate1_yp = ( - this_ds.SDATES_PERHARV.values[1:, 0, :] > this_ds.HDATES.values[1:, 0, :] - ) - # Where both, we have the problem. - falsely_alive_yp = no_sowing_yp & hsdate1_gt_hdate1_yp - if np.any(falsely_alive_yp): - print( - f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the " - + "year before harvest, but no sowings occurred that year." - ) - falsely_alive_yp = np.concatenate( - (np.full((1, this_ds.dims["patch"]), False), falsely_alive_yp), axis=0 - ) - falsely_alive_y1p = np.expand_dims(falsely_alive_yp, axis=1) - dummy_false_y1p = np.expand_dims(np.full_like(falsely_alive_yp, False), axis=1) - falsely_alive_yhp = np.concatenate((falsely_alive_y1p, dummy_false_y1p), axis=1) - for var in this_ds.data_vars: - if this_ds[var].dims != ("time", "mxharvests", "patch"): - continue - this_ds[var] = this_ds[var].where(~falsely_alive_yhp) - - def check_no_negative(this_ds_in, varlist_no_negative, which_file, verbose=False): - tiny_neg_ok = 1e-12 - this_ds = this_ds_in.copy() - for var in this_ds: - if not any(x in var for x in varlist_no_negative): - continue - the_min = np.nanmin(this_ds[var].values) - if the_min < 0: - if np.abs(the_min) <= tiny_neg_ok: - if verbose: - print( - f"Tiny negative value(s) in {var} (abs <= {tiny_neg_ok}) being set to 0" - + f" ({which_file})" - ) - else: - print( - f"WARNING: Unexpected negative value(s) in {var}; minimum {the_min} " - + f"({which_file})" - ) - values = this_ds[var].copy().values - with np.errstate(invalid="ignore"): - do_setto_0 = (values < 0) & (values >= -tiny_neg_ok) - values[np.where(do_setto_0)] = 0 - this_ds[var] = xr.DataArray( - values, - coords=this_ds[var].coords, - dims=this_ds[var].dims, - attrs=this_ds[var].attrs, - ) - - elif verbose: - print(f"No negative value(s) in {var}; min {the_min} ({which_file})") - return this_ds - - def check_no_zeros(this_ds, varlist_no_zero, which_file): - for var in this_ds: - if not any(x in var for x in varlist_no_zero): - continue - if np.any(this_ds[var].values == 0): - print(f"WARNING: Unexpected zero(s) in {var} ({which_file})") - elif verbose: - print(f"No zero value(s) in {var} ({which_file})") + this_ds = handle_zombie_crops(this_ds) # Check for no zero values where there shouldn't be varlist_no_zero = ["DATE", "YEAR"] - check_no_zeros(this_ds, varlist_no_zero, "original file") + check_no_zeros(this_ds, varlist_no_zero, "original file", verbose) # Convert time*mxharvests axes to growingseason axis this_ds_gs = convert_axis_time2gs(this_ds, verbose=verbose, incl_orig=False) @@ -1171,11 +435,11 @@ def check_no_zeros(this_ds, varlist_no_zero, which_file): # Avoid tiny negative values varlist_no_negative = ["GRAIN", "REASON", "GDD", "HUI", "YEAR", "DATE", "GSLEN"] - this_ds_gs = check_no_negative(this_ds_gs, varlist_no_negative, "new file", verbose=verbose) + this_ds_gs = check_no_negative(this_ds_gs, varlist_no_negative, "new file", verbose) # Check for no zero values where there shouldn't be varlist_no_zero = ["REASON", "DATE"] - check_no_zeros(this_ds_gs, varlist_no_zero, "new file") + check_no_zeros(this_ds_gs, varlist_no_zero, "new file", verbose) # Check that e.g., GDDACCUM <= HUI for var_list in [["GDDACCUM", "HUI"], ["SYEARS", "HYEARS"]]: @@ -1211,190 +475,37 @@ def check_no_zeros(this_ds, varlist_no_zero, which_file): return this_ds_gs -def print_onepatch_wrong_n_gs( - patch_index, - this_ds_orig, - sdates_ymp, - hdates_ymp, - sdates_pym, - hdates_pym, - sdates_pym2, - hdates_pym2, - sdates_pym3, - hdates_pym3, - sdates_pg, - hdates_pg, - sdates_pg2, - hdates_pg2, -): +def handle_zombie_crops(this_ds): """ - Print information about a patch (for debugging) + When doing transient runs, it's somehow possible for crops in newly-active patches to be + *already alive*. They even have a sowing date (idop)! This will of course not show up in + SDATES, but it does show up in SDATES_PERHARV. + I could put the SDATES_PERHARV dates into where they "should" be, but instead I'm just going + to invalidate those "seasons." """ - - print( - f"patch {patch_index}: {this_ds_orig.patches1d_itype_veg_str.values[patch_index]}, lon " - f"{this_ds_orig.patches1d_lon.values[patch_index]} lat " - f"{this_ds_orig.patches1d_lat.values[patch_index]}" - ) - - print("Original SDATES (per sowing):") - print(this_ds_orig.SDATES.values[:, :, patch_index]) - - print("Original HDATES (per harvest):") - print(this_ds_orig.HDATES.values[:, :, patch_index]) - - if "pandas" in sys.modules: - - def print_pandas_ymp(msg, cols, arrs_tuple): - print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") - mxharvests = arrs_tuple[0].shape[1] - arrs_list2 = [] - cols2 = [] - for harvest_index in np.arange(mxharvests): - for i, array in enumerate(arrs_tuple): - arrs_list2.append(array[:, harvest_index]) - cols2.append(cols[i] + str(harvest_index)) - arrs_tuple2 = tuple(arrs_list2) - dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) - dataframe.columns = cols2 - print(dataframe) - - print_pandas_ymp( - "Original", - ["sdate", "hdate"], - ( - this_ds_orig.SDATES_PERHARV.values[:, :, patch_index], - this_ds_orig.HDATES.values[:, :, patch_index], - ), - ) - - print_pandas_ymp( - "Masked", - ["sdate", "hdate"], - (sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index]), - ) - - print_pandas_ymp( - 'After "Ignore harvests from before this output began"', - ["sdate", "hdate"], - ( - np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], - ), - ) - - print_pandas_ymp( - 'After "In years with no sowing, pretend the first no-harvest is meaningful"', - ["sdate", "hdate"], - ( - np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], - ), - ) - - print_pandas_ymp( - ( - 'After "In years with sowing that are followed by inactive years, check whether the' - " last sowing was harvested before the patch was deactivated. If not, pretend the" - ' LAST no-harvest is meaningful."' - ), - ["sdate", "hdate"], - ( - np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], - ), - ) - - def print_pandas_pg(msg, cols, arrs_tuple): - print(f"{msg} ({np.sum(~np.isnan(arrs_tuple[0]))})") - arrs_list = list(arrs_tuple) - for i, array in enumerate(arrs_tuple): - arrs_list[i] = np.reshape(array, (-1)) - arrs_tuple2 = tuple(arrs_list) - dataframe = pd.DataFrame(np.stack(arrs_tuple2, axis=1)) - dataframe.columns = cols - print(dataframe) - - print_pandas_pg( - "Same, but converted to gs axis", - ["sdate", "hdate"], - (sdates_pg[patch_index, :], hdates_pg[patch_index, :]), - ) - - print_pandas_pg( - ( - 'After "Ignore any harvests that were planted in the final year, because some cells' - ' will have incomplete growing seasons for the final year"' - ), - ["sdate", "hdate"], - (sdates_pg2[patch_index, :], hdates_pg2[patch_index, :]), - ) - else: - print("Couldn't import pandas, so not displaying example bad patch ORIGINAL.") - - def print_nopandas(array_1, array_2, msg): - print(msg) - if array_1.ndim == 1: - # I don't know why these aren't side-by-side! - print(np.stack((array_1, array_2), axis=1)) - else: - print(np.concatenate((array_1, array_2), axis=1)) - - print_nopandas(sdates_ymp[:, :, patch_index], hdates_ymp[:, :, patch_index], "Masked:") - - print_nopandas( - np.transpose(sdates_pym, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym, (1, 2, 0))[:, :, patch_index], - 'After "Ignore harvests from before this output began"', - ) - - print_nopandas( - np.transpose(sdates_pym2, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym2, (1, 2, 0))[:, :, patch_index], - 'After "In years with no sowing, pretend the first no-harvest is meaningful"', - ) - - print_nopandas( - np.transpose(sdates_pym3, (1, 2, 0))[:, :, patch_index], - np.transpose(hdates_pym3, (1, 2, 0))[:, :, patch_index], - ( - 'After "In years with sowing that are followed by inactive years, check whether the' - " last sowing was harvested before the patch was deactivated. If not, pretend the" - ' LAST [easier to implement!] no-harvest is meaningful."' - ), + # In all but the last calendar year, which patches had no sowing? + no_sowing_yp = np.all(np.isnan(this_ds.SDATES.values[:-1, :, :]), axis=1) + # In all but the first calendar year, which harvests' jdays are < their sowings' jdays? + # (Indicates sowing the previous calendar year.) + with np.errstate(invalid="ignore"): + hsdate1_gt_hdate1_yp = ( + this_ds.SDATES_PERHARV.values[1:, 0, :] > this_ds.HDATES.values[1:, 0, :] ) - - print_nopandas( - sdates_pg[patch_index, :], hdates_pg[patch_index, :], "Same, but converted to gs axis" + # Where both, we have the problem. + falsely_alive_yp = no_sowing_yp & hsdate1_gt_hdate1_yp + if np.any(falsely_alive_yp): + print( + f"Warning: {np.sum(falsely_alive_yp)} patch-seasons being ignored: Seemingly sown the " + + "year before harvest, but no sowings occurred that year." ) - - print_nopandas( - sdates_pg2[patch_index, :], - hdates_pg2[patch_index, :], - ( - 'After "Ignore any harvests that were planted in the final year, because some cells' - ' will have incomplete growing seasons for the final year"' - ), + falsely_alive_yp = np.concatenate( + (np.full((1, this_ds.dims["patch"]), False), falsely_alive_yp), axis=0 ) - - print("\n\n") - - -def set_up_ds_with_gs_axis(ds_in): - """ - Set up empty Dataset with time axis as "gs" (growing season) instead of what CLM puts out. - - Includes all the same variables as the input dataset, minus any that had dimensions mxsowings or - mxharvests. - """ - # Get the data variables to include in the new dataset - data_vars = {} - for var in ds_in.data_vars: - if not any(x in ["mxsowings", "mxharvests"] for x in ds_in[var].dims): - data_vars[var] = ds_in[var] - # Set up the new dataset - gs_years = [t.year - 1 for t in ds_in.time.values[:-1]] - coords = ds_in.coords - coords["gs"] = gs_years - ds_out = xr.Dataset(data_vars=data_vars, coords=coords, attrs=ds_in.attrs) - return ds_out + falsely_alive_y1p = np.expand_dims(falsely_alive_yp, axis=1) + dummy_false_y1p = np.expand_dims(np.full_like(falsely_alive_yp, False), axis=1) + falsely_alive_yhp = np.concatenate((falsely_alive_y1p, dummy_false_y1p), axis=1) + for var in this_ds.data_vars: + if this_ds[var].dims != ("time", "mxharvests", "patch"): + continue + this_ds[var] = this_ds[var].where(~falsely_alive_yhp) + return this_ds From 34b3320bd0c2dd787abbf0af64625cb202704a33 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 16:11:43 -0700 Subject: [PATCH 61/85] pylint: Allow variable names ax and im (common in matplotlib instructions). --- python/ctsm/.pylintrc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/ctsm/.pylintrc b/python/ctsm/.pylintrc index 2087913e8a..ceff04c7d8 100644 --- a/python/ctsm/.pylintrc +++ b/python/ctsm/.pylintrc @@ -436,7 +436,10 @@ good-names=i, _, # --- default list is above here, our own list is below here --- # Allow logger as a global name in each module, because this seems to follow general recommended convention: - logger + logger, +# Allow these names, which are commonly used in matplotlib instructions + ax, + im # Include a hint for the correct naming format with invalid-name. include-naming-hint=no From be18e3ea3e39dc70396c6b1fc9b1283fb11c289b Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 16:11:57 -0700 Subject: [PATCH 62/85] Satisfy pylint for cropcal_figs_module.py. --- .../crop_calendars/cropcal_figs_module.py | 47 ++++++++++++------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_figs_module.py b/python/ctsm/crop_calendars/cropcal_figs_module.py index 8d7f472fec..d820460175 100644 --- a/python/ctsm/crop_calendars/cropcal_figs_module.py +++ b/python/ctsm/crop_calendars/cropcal_figs_module.py @@ -1,5 +1,11 @@ +""" +Functions for making crop calendar figures +""" + import numpy as np +# It's fine if these can't be imported. The script using these will handle it. +# pylint: disable=import-error import cartopy.crs as ccrs import matplotlib.pyplot as plt import matplotlib.colors as mcolors @@ -23,6 +29,9 @@ # Cases (line and scatter plots) def cropcal_colors_cases(casename): + """ + Define colors for each case + """ case_color_dict = { "clm default": [x / 255 for x in [92, 219, 219]], "prescribed calendars": [x / 255 for x in [250, 102, 240]], @@ -32,11 +41,8 @@ def cropcal_colors_cases(casename): case_color_dict["5.0 lu"] = case_color_dict["clm default"] case_color_dict["5.2 lu"] = case_color_dict["prescribed calendars"] - case_color = None casename_for_colors = casename.lower().replace(" (0)", "").replace(" (1)", "") - if casename_for_colors in case_color_dict: - case_color = case_color_dict[casename_for_colors] - return case_color + return case_color_dict.get(casename_for_colors, None) def make_map( @@ -65,6 +71,9 @@ def make_map( vmin=None, vrange=None, ): + """ + Make map + """ if underlay is not None: if underlay_color is None: underlay_color = cropcal_colors["underlay"] @@ -147,23 +156,25 @@ def make_map( # Need to do this for subplot row labels set_ticks(-1, fontsize, "y") plt.yticks([]) - for x in ax.spines: - ax.spines[x].set_visible(False) + for spine in ax.spines: + ax.spines[spine].set_visible(False) if show_cbar: return im, cbar - else: - return im, None + return im, None def deal_with_ticklabels(cbar, cbar_max, ticklabels, ticklocations, units, im): + """ + Handle settings related to ticklabels + """ if ticklocations is not None: cbar.set_ticks(ticklocations) if units is not None and units.lower() == "month": cbar.set_ticklabels( ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] ) - units == "Month" + units = "Month" elif ticklabels is not None: cbar.set_ticklabels(ticklabels) if isinstance(im, mplcol.QuadMesh): @@ -173,7 +184,7 @@ def deal_with_ticklabels(cbar, cbar_max, ticklabels, ticklocations, units, im): if cbar_max is not None and clim_max > cbar_max: if ticklabels is not None: raise RuntimeError( - "How to handle this now that you are specifying ticklocations separate from ticklabels?" + "How to handle this now that ticklocations is specified separately from ticklabels?" ) ticks = cbar.get_ticks() if ticks[-2] > cbar_max: @@ -182,24 +193,28 @@ def deal_with_ticklabels(cbar, cbar_max, ticklabels, ticklocations, units, im): ) ticklabels = ticks.copy() ticklabels[-1] = cbar_max - for i, x in enumerate(ticklabels): - if x == int(x): - ticklabels[i] = str(int(x)) + for i, ticklabel in enumerate(ticklabels): + if ticklabel == int(ticklabel): + ticklabels[i] = str(int(ticklabel)) cbar.set_ticks( ticks - ) # Calling this before set_xticklabels() avoids "UserWarning: FixedFormatter should only be used together with FixedLocator" (https://stackoverflow.com/questions/63723514/userwarning-fixedformatter-should-only-be-used-together-with-fixedlocator) + ) # Calling this before set_xticklabels() avoids "UserWarning: FixedFormatter should only + # be used together with FixedLocator" (https://stackoverflow.com/questions/63723514) cbar.set_ticklabels(ticklabels) def set_ticks(lonlat_bin_width, fontsize, x_or_y): + """ + Plot tick marks + """ if x_or_y == "x": ticks = np.arange(-180, 181, lonlat_bin_width) else: ticks = np.arange(-60, 91, lonlat_bin_width) ticklabels = [str(x) for x in ticks] - for i, x in enumerate(ticks): - if x % 2: + for i, tick in enumerate(ticks): + if tick % 2: ticklabels[i] = "" if x_or_y == "x": From 5b3cad7c7e61e8c1d945b2a29dac15bf85573c4f Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 16:17:22 -0700 Subject: [PATCH 63/85] pylint: Resolve remaining wrong-import-position complaints. --- python/ctsm/crop_calendars/cropcal_utils.py | 6 ++++-- python/ctsm/crop_calendars/regrid_ggcmi_shdates.py | 9 ++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index 4d77d2ef66..f96efd1d99 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -1,5 +1,7 @@ -"""utility functions""" -"""copied from klindsay, https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/blob/master/utils.py""" +""" +utility functions +copied from klindsay, https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/blob/master/utils.py +""" import re import warnings diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index 911b2f93a1..5c2e7f8820 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -11,9 +11,12 @@ _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) sys.path.insert(1, _CTSM_PYTHON) -from ctsm.utils import abort -from ctsm.ctsm_pylib_dependent_utils import import_coord_1d, import_coord_2d -from ctsm import ctsm_logging +from ctsm.utils import abort # pylint: disable=wrong-import-position +from ctsm.ctsm_pylib_dependent_utils import ( # pylint: disable=wrong-import-position + import_coord_1d, + import_coord_2d, +) +from ctsm import ctsm_logging # pylint: disable=wrong-import-position logger = logging.getLogger(__name__) From 89c3dade60cb0192e6003399efd0ae2b103f88b1 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 8 Feb 2024 21:14:01 -0700 Subject: [PATCH 64/85] Post-pylint fixes to get RXCROPMATURITY working again. --- .../crop_calendars/check_constant_vars.py | 19 ++++++++++--- python/ctsm/crop_calendars/check_rx_obeyed.py | 2 +- .../ctsm/crop_calendars/cropcal_constants.py | 26 ++++++++++++++++++ python/ctsm/crop_calendars/cropcal_module.py | 27 +++---------------- 4 files changed, 45 insertions(+), 29 deletions(-) create mode 100644 python/ctsm/crop_calendars/cropcal_constants.py diff --git a/python/ctsm/crop_calendars/check_constant_vars.py b/python/ctsm/crop_calendars/check_constant_vars.py index 92e1819803..1a5a4e62c6 100644 --- a/python/ctsm/crop_calendars/check_constant_vars.py +++ b/python/ctsm/crop_calendars/check_constant_vars.py @@ -67,6 +67,7 @@ def loop_through_bad_patches( vary_lats, vary_crops, vary_crops_int, + any_bad, ): """ Loop through and check any patches that were "bad" according to check_constant_vars(). @@ -99,7 +100,7 @@ def loop_through_bad_patches( else: raise RuntimeError(f"lon {this_lon} lat {this_lat} {this_crop} not in rx dataset?") - # Print info (or save to print later) + # Print info (or save to print later) any_bad = True if verbose: this_str = ( @@ -181,10 +182,14 @@ def check_one_constant_var_loop_through_timesteps( these_patches, t1_yr, t1_vals, + any_bad, + any_bad_before_checking_rx, + bad_patches, ): """ In check_one_constant_var(), loop through timesteps """ + found_in_rx = None for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]): t_yr = this_ds[time_coord].values[timestep] t_vals = np.squeeze(this_da.isel({time_coord: timestep, "patch": these_patches}).values) @@ -250,13 +255,14 @@ def check_one_constant_var_loop_through_timesteps( vary_lats, vary_crops, vary_crops_int, + any_bad, ) return any_bad_before_checking_rx, bad_patches, found_in_rx, any_bad def check_one_constant_var( - this_ds, case, ignore_nan, verbose, emojus, var, any_bad_before_checking_rx + this_ds, case, ignore_nan, verbose, emojus, var, any_bad, any_bad_before_checking_rx ): """ Ensure that a variable that should be constant actually is @@ -306,12 +312,17 @@ def check_one_constant_var( these_patches, t1_yr, t1_vals, + any_bad, + any_bad_before_checking_rx, + bad_patches, ) if verbose and any_bad: print(f"{emojus} CLM output {var} unexpectedly vary over time:") str_list.sort() - if rx_ds and np.any(~found_in_rx): + if found_in_rx is None: + raise RuntimeError("Somehow any_bad True but found_in_rx None") + if rx_ds and np.any(~found_in_rx): # pylint: disable=invalid-unary-operand-type str_list = [ "*: Not found in prescribed input file (maybe minor lon/lat mismatch)" ] + str_list @@ -376,7 +387,7 @@ def check_constant_vars( for var in const_vars: any_bad, any_bad_before_checking_rx, bad_patches = check_one_constant_var( - this_ds, case, ignore_nan, verbose, emojus, var, any_bad_before_checking_rx + this_ds, case, ignore_nan, verbose, emojus, var, any_bad, any_bad_before_checking_rx ) if any_bad and throw_error: diff --git a/python/ctsm/crop_calendars/check_rx_obeyed.py b/python/ctsm/crop_calendars/check_rx_obeyed.py index c1ad5cfecc..3d769d3820 100644 --- a/python/ctsm/crop_calendars/check_rx_obeyed.py +++ b/python/ctsm/crop_calendars/check_rx_obeyed.py @@ -14,7 +14,7 @@ ) sys.path.insert(1, _CTSM_PYTHON) import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position -from ctsm.crop_calendars.cropcal_module import ( # pylint: disable=wrong-import-position +from ctsm.crop_calendars.cropcal_constants import ( # pylint: disable=wrong-import-position DEFAULT_GDD_MIN, ) diff --git a/python/ctsm/crop_calendars/cropcal_constants.py b/python/ctsm/crop_calendars/cropcal_constants.py new file mode 100644 index 0000000000..f015ac7db1 --- /dev/null +++ b/python/ctsm/crop_calendars/cropcal_constants.py @@ -0,0 +1,26 @@ +""" +Constants used in crop calendar scripts +""" + +# Define conversion multipliers, {from: {to1, to2, ...}, ...} +multiplier_dict = { + # Mass + "g": { + "Mt": 1e-12, + }, + "t": { + "Mt": 1e-6, + }, + # Volume + "m3": { + "km3": 1e-9, + }, + # Yield + "g/m2": { + "t/ha": 1e-6 * 1e4, + }, +} + +# Minimum harvest threshold allowed in PlantCrop() +# Was 50 before cropcal runs 2023-01-28 +DEFAULT_GDD_MIN = 1.0 diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index aa3c5d469e..91963aa269 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -22,30 +22,9 @@ from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position check_rx_obeyed, ) - - -# Define conversion multipliers, {from: {to1, to2, ...}, ...} -multiplier_dict = { - # Mass - "g": { - "Mt": 1e-12, - }, - "t": { - "Mt": 1e-6, - }, - # Volume - "m3": { - "km3": 1e-9, - }, - # Yield - "g/m2": { - "t/ha": 1e-6 * 1e4, - }, -} - -# Minimum harvest threshold allowed in PlantCrop() -# Was 50 before cropcal runs 2023-01-28 -DEFAULT_GDD_MIN = 1.0 +from ctsm.crop_calendars.cropcal_constants import ( # pylint: disable=wrong-import-position + DEFAULT_GDD_MIN, +) def check_and_trim_years(year_1, year_n, ds_in): From 3e8b83504c00497ad0185abbacca92feca7b8035 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:31:55 -0700 Subject: [PATCH 65/85] pylint: Resolve remaining invalid-name complaints. --- python/ctsm/crop_calendars/cropcal_module.py | 4 +- python/ctsm/crop_calendars/cropcal_utils.py | 302 +++++++++--------- .../crop_calendars/generate_gdds_functions.py | 12 +- .../crop_calendars/regrid_ggcmi_shdates.py | 32 +- .../ctsm/test/test_unit_utils_import_coord.py | 4 +- 5 files changed, 180 insertions(+), 174 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index 91963aa269..b3b415b77c 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -266,7 +266,7 @@ def import_rx_dates(var_prefix, date_infile, dates_ds, set_neg1_to_nan=True): this_var = f"{var_prefix}{j+1}_{i}" date_varlist = date_varlist + [this_var] - this_ds = utils.import_ds(date_infile, myVars=date_varlist) + this_ds = utils.import_ds(date_infile, my_vars=date_varlist) did_warn = False for var in this_ds: @@ -355,7 +355,7 @@ def import_output( Import CLM output """ # Import - this_ds = utils.import_ds(filename, myVars=my_vars, myVegtypes=my_vegtypes) + this_ds = utils.import_ds(filename, my_vars=my_vars, my_vegtypes=my_vegtypes) # Trim to years of interest (do not include extra year needed for finishing last growing season) if year_1 and year_n: diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index f96efd1d99..2f84bd6739 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -250,8 +250,8 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): indices = np.array([-1]) else: indices = np.full(len(vegtype_str), -1) - for v in np.unique(vegtype_str): - indices[np.where(vegtype_str == v)] = vegtype_mainlist.index(v) + for vegtype_str_2 in np.unique(vegtype_str): + indices[np.where(vegtype_str == vegtype_str_2)] = vegtype_mainlist.index(vegtype_str_2) if convert_to_ndarray: indices = [int(x) for x in indices] return indices @@ -331,8 +331,8 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr if slice_members == []: raise TypeError("slice is all None?") this_type = int - for x in slice_members: - if x < 0 or not isinstance(x, int): + for member in slice_members: + if member < 0 or not isinstance(member, int): this_type = "values" break elif isinstance(selection, np.ndarray): @@ -341,12 +341,12 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr else: is_inefficient = True this_type = None - for x in selection: - if x < 0 or x % 1 > 0: - if isinstance(x, int): + for member in selection: + if member < 0 or member % 1 > 0: + if isinstance(member, int): this_type = "values" else: - this_type = type(x) + this_type = type(member) break if this_type == None: this_type = int @@ -385,47 +385,47 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr # Trim along relevant 1d axes if isinstance(xr_object, xr.Dataset) and key in ["lat", "lon"]: if selection_type == "indices": - inclCoords = xr_object[key].values[selection] + incl_coords = xr_object[key].values[selection] elif selection_type == "values": if isinstance(selection, slice): - inclCoords = xr_object.sel({key: selection}, drop=False)[key].values + incl_coords = xr_object.sel({key: selection}, drop=False)[key].values else: - inclCoords = selection + incl_coords = selection else: raise TypeError(f"selection_type {selection_type} not recognized") if key == "lat": - thisXY = "jxy" + this_xy = "jxy" elif key == "lon": - thisXY = "ixy" + this_xy = "ixy" else: raise KeyError( f"Key '{key}' not recognized: What 1d_ suffix should I use for variable" " name?" ) - pattern = re.compile(f"1d_{thisXY}") + pattern = re.compile(f"1d_{this_xy}") matches = [x for x in list(xr_object.keys()) if pattern.search(x) != None] - for thisVar in matches: - if len(xr_object[thisVar].dims) != 1: + for var in matches: + if len(xr_object[var].dims) != 1: raise RuntimeError( - f"Expected {thisVar} to have 1 dimension, but it has" - f" {len(xr_object[thisVar].dims)}: {xr_object[thisVar].dims}" + f"Expected {var} to have 1 dimension, but it has" + f" {len(xr_object[var].dims)}: {xr_object[var].dims}" ) - thisVar_dim = xr_object[thisVar].dims[0] - # print(f"Variable {thisVar} has dimension {thisVar_dim}") - thisVar_coords = xr_object[key].values[ - xr_object[thisVar].values.astype(int) - 1 - ] - # print(f"{thisVar_dim} size before: {xr_object.sizes[thisVar_dim]}") + dim = xr_object[var].dims[0] + # print(f"Variable {var} has dimension {dim}") + coords = xr_object[key].values[xr_object[var].values.astype(int) - 1] + # print(f"{dim} size before: {xr_object.sizes[dim]}") ok_ind = [] - new_1d_thisXY = [] - for i, x in enumerate(thisVar_coords): - if x in inclCoords: + new_1d_this_xy = [] + for i, member in enumerate(coords): + if member in incl_coords: ok_ind = ok_ind + [i] - new_1d_thisXY = new_1d_thisXY + [(inclCoords == x).nonzero()[0] + 1] - xr_object = xr_object.isel({thisVar_dim: ok_ind}) - new_1d_thisXY = np.array(new_1d_thisXY).squeeze() - xr_object[thisVar].values = new_1d_thisXY - # print(f"{thisVar_dim} size after: {xr_object.sizes[thisVar_dim]}") + new_1d_this_xy = new_1d_this_xy + [ + (incl_coords == member).nonzero()[0] + 1 + ] + xr_object = xr_object.isel({dim: ok_ind}) + new_1d_this_xy = np.array(new_1d_this_xy).squeeze() + xr_object[var].values = new_1d_this_xy + # print(f"{dim} size after: {xr_object.sizes[dim]}") # Perform selection if selection_type == "indices": @@ -463,72 +463,71 @@ def get_patch_ivts(this_ds, this_pftlist): # Convert a list of strings with vegetation type names into a DataArray. Used to add vegetation type info in import_ds(). def get_vegtype_str_da(vegtype_str): nvt = len(vegtype_str) - thisName = "vegtype_str" vegtype_str_da = xr.DataArray( - vegtype_str, coords={"ivt": np.arange(0, nvt)}, dims=["ivt"], name=thisName + vegtype_str, coords={"ivt": np.arange(0, nvt)}, dims=["ivt"], name="vegtype_str" ) return vegtype_str_da # Function to drop unwanted variables in preprocessing of open_mfdataset(), making sure to NOT drop any unspecified variables that will be useful in gridding. Also adds vegetation type info in the form of a DataArray of strings. # Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be named like "patch". This can later be reversed, for compatibility with other code, using patch2pft(). -def mfdataset_preproc(ds, vars_to_import, vegtypes_to_import, timeSlice): +def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): # Rename "pft" dimension and variables to "patch", if needed - if "pft" in ds.dims: + if "pft" in ds_in.dims: pattern = re.compile("pft.*1d") - matches = [x for x in list(ds.keys()) if pattern.search(x) != None] + matches = [x for x in list(ds_in.keys()) if pattern.search(x) != None] pft2patch_dict = {"pft": "patch"} - for m in matches: - pft2patch_dict[m] = m.replace("pft", "patch").replace("patchs", "patches") - ds = ds.rename(pft2patch_dict) + for match in matches: + pft2patch_dict[match] = match.replace("pft", "patch").replace("patchs", "patches") + ds_in = ds_in.rename(pft2patch_dict) derived_vars = [] if vars_to_import != None: # Split vars_to_import into variables that are vs. aren't already in ds - derived_vars = [v for v in vars_to_import if v not in ds] - present_vars = [v for v in vars_to_import if v in ds] + derived_vars = [v for v in vars_to_import if v not in ds_in] + present_vars = [v for v in vars_to_import if v in ds_in] vars_to_import = present_vars # Get list of dimensions present in variables in vars_to_import. - dimList = [] - for thisVar in vars_to_import: + dim_list = [] + for var in vars_to_import: # list(set(x)) returns a list of the unique items in x - dimList = list(set(dimList + list(ds.variables[thisVar].dims))) + dim_list = list(set(dim_list + list(ds_in.variables[var].dims))) # Get any _1d variables that are associated with those dimensions. These will be useful in gridding. Also, if any dimension is "pft", set up to rename it and all like-named variables to "patch" - onedVars = [] - for thisDim in dimList: - pattern = re.compile(f"{thisDim}.*1d") - matches = [x for x in list(ds.keys()) if pattern.search(x) != None] - onedVars = list(set(onedVars + matches)) + oned_vars = [] + for dim in dim_list: + pattern = re.compile(f"{dim}.*1d") + matches = [x for x in list(ds_in.keys()) if pattern.search(x) != None] + oned_vars = list(set(oned_vars + matches)) # Add dimensions and _1d variables to vars_to_import - vars_to_import = list(set(vars_to_import + list(ds.dims) + onedVars)) + vars_to_import = list(set(vars_to_import + list(ds_in.dims) + oned_vars)) # Add any _bounds variables bounds_vars = [] - for v in vars_to_import: - bounds_var = v + "_bounds" - if bounds_var in ds: + for var in vars_to_import: + bounds_var = var + "_bounds" + if bounds_var in ds_in: bounds_vars = bounds_vars + [bounds_var] vars_to_import = vars_to_import + bounds_vars # Get list of variables to drop - varlist = list(ds.variables) + varlist = list(ds_in.variables) vars_to_drop = list(np.setdiff1d(varlist, vars_to_import)) # Drop them - ds = ds.drop_vars(vars_to_drop) + ds_in = ds_in.drop_vars(vars_to_drop) # Add vegetation type info - if "patches1d_itype_veg" in list(ds): + if "patches1d_itype_veg" in list(ds_in): this_pftlist = define_pftlist() get_patch_ivts( - ds, this_pftlist + ds_in, this_pftlist ) # Includes check of whether vegtype changes over time anywhere vegtype_da = get_vegtype_str_da(this_pftlist) patches1d_itype_veg_str = vegtype_da.values[ - ds.isel(time=0).patches1d_itype_veg.values.astype(int) + ds_in.isel(time=0).patches1d_itype_veg.values.astype(int) ] npatch = len(patches1d_itype_veg_str) patches1d_itype_veg_str = xr.DataArray( @@ -537,77 +536,82 @@ def mfdataset_preproc(ds, vars_to_import, vegtypes_to_import, timeSlice): dims=["patch"], name="patches1d_itype_veg_str", ) - ds = xr.merge([ds, vegtype_da, patches1d_itype_veg_str]) + ds_in = xr.merge([ds_in, vegtype_da, patches1d_itype_veg_str]) # Restrict to veg. types of interest, if any if vegtypes_to_import != None: - ds = xr_flexsel(ds, vegtype=vegtypes_to_import) + ds_in = xr_flexsel(ds_in, vegtype=vegtypes_to_import) # Restrict to time slice, if any - if timeSlice: - ds = safer_timeslice(ds, timeSlice) + if time_slice: + ds_in = safer_timeslice(ds_in, time_slice) # Finish import - ds = xr.decode_cf(ds, decode_times=True) + ds_in = xr.decode_cf(ds_in, decode_times=True) # Compute derived variables - for v in derived_vars: - if v == "HYEARS" and "HDATES" in ds and ds.HDATES.dims == ("time", "mxharvests", "patch"): - yearList = np.array([np.float32(x.year - 1) for x in ds.time.values]) - hyears = ds["HDATES"].copy() + for var in derived_vars: + if ( + var == "HYEARS" + and "HDATES" in ds_in + and ds_in.HDATES.dims == ("time", "mxharvests", "patch") + ): + year_list = np.array([np.float32(x.year - 1) for x in ds_in.time.values]) + hyears = ds_in["HDATES"].copy() hyears.values = np.tile( - np.expand_dims(yearList, (1, 2)), (1, ds.dims["mxharvests"], ds.dims["patch"]) + np.expand_dims(year_list, (1, 2)), + (1, ds_in.dims["mxharvests"], ds_in.dims["patch"]), ) with np.errstate(invalid="ignore"): - is_le_zero = ~np.isnan(ds.HDATES.values) & (ds.HDATES.values <= 0) - hyears.values[is_le_zero] = ds.HDATES.values[is_le_zero] - hyears.values[np.isnan(ds.HDATES.values)] = np.nan + is_le_zero = ~np.isnan(ds_in.HDATES.values) & (ds_in.HDATES.values <= 0) + hyears.values[is_le_zero] = ds_in.HDATES.values[is_le_zero] + hyears.values[np.isnan(ds_in.HDATES.values)] = np.nan hyears.attrs["long_name"] = "DERIVED: actual crop harvest years" hyears.attrs["units"] = "year" - ds["HYEARS"] = hyears + ds_in["HYEARS"] = hyears - return ds + return ds_in # Import a dataset that can be spread over multiple files, only including specified variables and/or vegetation types and/or timesteps, concatenating by time. DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or vegetation types. def import_ds( filelist, - myVars=None, - myVegtypes=None, - timeSlice=None, - myVars_missing_ok=[], + my_vars=None, + my_vegtypes=None, + time_slice=None, + my_vars_missing_ok=[], only_active_patches=False, rename_lsmlatlon=False, chunks=None, ): - # Convert myVegtypes here, if needed, to avoid repeating the process each time you read a file in xr.open_mfdataset(). - if myVegtypes is not None: - if not isinstance(myVegtypes, list): - myVegtypes = [myVegtypes] - if isinstance(myVegtypes[0], str): - myVegtypes = vegtype_str2int(myVegtypes) + # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file in xr.open_mfdataset(). + if my_vegtypes is not None: + if not isinstance(my_vegtypes, list): + my_vegtypes = [my_vegtypes] + if isinstance(my_vegtypes[0], str): + my_vegtypes = vegtype_str2int(my_vegtypes) # Same for these variables. - if myVars != None: - if not isinstance(myVars, list): - myVars = [myVars] - if myVars_missing_ok: - if not isinstance(myVars_missing_ok, list): - myVars_missing_ok = [myVars_missing_ok] + if my_vars != None: + if not isinstance(my_vars, list): + my_vars = [my_vars] + if my_vars_missing_ok: + if not isinstance(my_vars_missing_ok, list): + my_vars_missing_ok = [my_vars_missing_ok] # Make sure lists are actually lists if not isinstance(filelist, list): filelist = [filelist] - if not isinstance(myVars_missing_ok, list): - myVars_missing_ok = [myVars_missing_ok] + if not isinstance(my_vars_missing_ok, list): + my_vars_missing_ok = [my_vars_missing_ok] # Remove files from list if they don't contain requested timesteps. - # timeSlice should be in the format slice(start,end[,step]). start or end can be None to be unbounded on one side. Note that the standard slice() documentation suggests that only elements through end-1 will be selected, but that seems not to be the case in the xarray implementation. - if timeSlice: + # time_slice should be in the format slice(start,end[,step]). start or end can be None to be unbounded on one side. Note that the standard slice() documentation suggests that only elements through end-1 will be selected, but that seems not to be the case in the xarray implementation. + if time_slice: new_filelist = [] for file in sorted(filelist): filetime = xr.open_dataset(file).time - filetime_sel = safer_timeslice(filetime, timeSlice) + filetime_sel = safer_timeslice(filetime, time_slice) include_this_file = filetime_sel.size if include_this_file: new_filelist.append(file) @@ -616,11 +620,11 @@ def import_ds( elif new_filelist: break if not new_filelist: - raise RuntimeError(f"No files found in timeSlice {timeSlice}") + raise RuntimeError(f"No files found in time_slice {time_slice}") filelist = new_filelist - # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function allows this. Could also just allow mfdataset_preproc() to access myVars and myVegtypes directly, but that's bad practice as it could lead to scoping issues. - mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, myVars, myVegtypes, timeSlice) + # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes directly, but that's bad practice as it could lead to scoping issues. + mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, my_vars, my_vegtypes, time_slice) # Import if isinstance(filelist, list) and len(filelist) == 1: @@ -646,7 +650,7 @@ def import_ds( ) elif isinstance(filelist, str): this_ds = xr.open_dataset(filelist, chunks=chunks) - this_ds = mfdataset_preproc(this_ds, myVars, myVegtypes, timeSlice) + this_ds = mfdataset_preproc(this_ds, my_vars, my_vegtypes, time_slice) this_ds = this_ds.compute() # Include only active patches (or whatever) @@ -656,10 +660,10 @@ def import_ds( this_ds_active = this_ds.isel(patch=p_active) # Warn and/or error about variables that couldn't be imported or derived - if myVars: - missing_vars = [v for v in myVars if v not in this_ds] - ok_missing_vars = [v for v in missing_vars if v in myVars_missing_ok] - bad_missing_vars = [v for v in missing_vars if v not in myVars_missing_ok] + if my_vars: + missing_vars = [v for v in my_vars if v not in this_ds] + ok_missing_vars = [v for v in missing_vars if v in my_vars_missing_ok] + bad_missing_vars = [v for v in missing_vars if v not in my_vars_missing_ok] if ok_missing_vars: print( "Could not import some variables; either not present or not deriveable:" @@ -681,37 +685,37 @@ def import_ds( # Return a DataArray, with defined coordinates, for a given variable in a dataset. -def get_thisVar_da(thisVar, this_ds): +def get_thisvar_da(var, this_ds): # Make DataArray for this variable - thisvar_da = np.array(this_ds.variables[thisVar]) - theseDims = this_ds.variables[thisVar].dims - thisvar_da = xr.DataArray(thisvar_da, dims=theseDims) + thisvar_da = np.array(this_ds.variables[var]) + these_dims = this_ds.variables[var].dims + thisvar_da = xr.DataArray(thisvar_da, dims=these_dims) # Define coordinates of this variable's DataArray - dimsDict = dict() - for thisDim in theseDims: - dimsDict[thisDim] = this_ds[thisDim] - thisvar_da = thisvar_da.assign_coords(dimsDict) - thisvar_da.attrs = this_ds[thisVar].attrs + dims_dict = dict() + for dim in these_dims: + dims_dict[dim] = this_ds[dim] + thisvar_da = thisvar_da.assign_coords(dims_dict) + thisvar_da.attrs = this_ds[var].attrs return thisvar_da # Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, lon) of one variable within a Dataset. Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices along the specified ax(ie)s. # -# fillValue: Default None means grid will be filled with NaN, unless the variable in question already has a fillValue, in which case that will be used. -def grid_one_variable(this_ds, thisVar, fillValue=None, **kwargs): +# fill_value: Default None means grid will be filled with NaN, unless the variable in question already has a _FillValue, in which case that will be used. +def grid_one_variable(this_ds, var, fill_value=None, **kwargs): # Get this Dataset's values for selection(s), if provided this_ds = xr_flexsel(this_ds, **kwargs) # Get DataArrays needed for gridding - thisvar_da = get_thisVar_da(thisVar, this_ds) + thisvar_da = get_thisvar_da(var, this_ds) vt_da = None if "patch" in thisvar_da.dims: spatial_unit = "patch" xy_1d_prefix = "patches" if "patches1d_itype_veg" in this_ds: - vt_da = get_thisVar_da("patches1d_itype_veg", this_ds) + vt_da = get_thisvar_da("patches1d_itype_veg", this_ds) elif "gridcell" in thisvar_da.dims: spatial_unit = "gridcell" xy_1d_prefix = "grid" @@ -719,11 +723,11 @@ def grid_one_variable(this_ds, thisVar, fillValue=None, **kwargs): raise RuntimeError( f"What variables to use for _ixy and _jxy of variable with dims {thisvar_da.dims}?" ) - ixy_da = get_thisVar_da(xy_1d_prefix + "1d_ixy", this_ds) - jxy_da = get_thisVar_da(xy_1d_prefix + "1d_jxy", this_ds) + ixy_da = get_thisvar_da(xy_1d_prefix + "1d_ixy", this_ds) + jxy_da = get_thisvar_da(xy_1d_prefix + "1d_jxy", this_ds) - if not fillValue and "_FillValue" in thisvar_da.attrs: - fillValue = thisvar_da.attrs["_FillValue"] + if not fill_value and "_FillValue" in thisvar_da.attrs: + fill_value = thisvar_da.attrs["_FillValue"] # Renumber vt_da to work as indices on new ivt dimension, if needed. ### Ensures that the unique set of vt_da values begins with 1 and @@ -743,18 +747,18 @@ def grid_one_variable(this_ds, thisVar, fillValue=None, **kwargs): new_dims = new_dims + ["lat", "lon"] # Set up empty array - n_list = [] + dim_size_list = [] for dim in new_dims: if dim == "ivt_str": - n = this_ds.sizes["ivt"] + dim_size = this_ds.sizes["ivt"] elif dim in thisvar_da.coords: - n = thisvar_da.sizes[dim] + dim_size = thisvar_da.sizes[dim] else: - n = this_ds.sizes[dim] - n_list = n_list + [n] - thisvar_gridded = np.empty(n_list) - if fillValue: - thisvar_gridded[:] = fillValue + dim_size = this_ds.sizes[dim] + dim_size_list = dim_size_list + [dim_size] + thisvar_gridded = np.empty(dim_size_list) + if fill_value: + thisvar_gridded[:] = fill_value else: thisvar_gridded[:] = np.NaN @@ -790,45 +794,45 @@ def grid_one_variable(this_ds, thisVar, fillValue=None, **kwargs): else: values = this_ds[dim].values thisvar_gridded = thisvar_gridded.assign_coords({dim: values}) - thisvar_gridded.name = thisVar + thisvar_gridded.name = var # Add FillValue attribute - if fillValue: - thisvar_gridded.attrs["_FillValue"] = fillValue + if fill_value: + thisvar_gridded.attrs["_FillValue"] = fill_value return thisvar_gridded # ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) for some reason. This function tries to fall back to slicing by integers. It should work with both Datasets and DataArrays. -def safer_timeslice(ds, timeSlice, timeVar="time"): +def safer_timeslice(ds_in, time_slice, time_var="time"): try: - ds = ds.sel({timeVar: timeSlice}) + ds_in = ds_in.sel({time_var: time_slice}) except: # If the issue might have been slicing using strings, try to fall back to integer slicing if ( - isinstance(timeSlice.start, str) - and isinstance(timeSlice.stop, str) - and len(timeSlice.start.split("-")) == 3 - and timeSlice.start.split("-")[1:] == ["01", "01"] - and len(timeSlice.stop.split("-")) == 3 + isinstance(time_slice.start, str) + and isinstance(time_slice.stop, str) + and len(time_slice.start.split("-")) == 3 + and time_slice.start.split("-")[1:] == ["01", "01"] + and len(time_slice.stop.split("-")) == 3 and ( - timeSlice.stop.split("-")[1:] == ["12", "31"] - or timeSlice.stop.split("-")[1:] == ["01", "01"] + time_slice.stop.split("-")[1:] == ["12", "31"] + or time_slice.stop.split("-")[1:] == ["01", "01"] ) ): - fileyears = np.array([x.year for x in ds.time.values]) + fileyears = np.array([x.year for x in ds_in.time.values]) if len(np.unique(fileyears)) != len(fileyears): print("Could not fall back to integer slicing of years: Time axis not annual") raise - yStart = int(timeSlice.start.split("-")[0]) - yStop = int(timeSlice.stop.split("-")[0]) - where_in_timeSlice = np.where((fileyears >= yStart) & (fileyears <= yStop))[0] - ds = ds.isel({timeVar: where_in_timeSlice}) + y_start = int(time_slice.start.split("-")[0]) + y_stop = int(time_slice.stop.split("-")[0]) + where_in_timeslice = np.where((fileyears >= y_start) & (fileyears <= y_stop))[0] + ds_in = ds_in.isel({time_var: where_in_timeslice}) else: - print(f"Could not fall back to integer slicing for timeSlice {timeSlice}") + print(f"Could not fall back to integer slicing for time_slice {time_slice}") raise - return ds + return ds_in # Convert a longitude axis that's -180 to 180 around the international date line to one that's 0 to 360 around the prime meridian. If you pass in a Dataset or DataArray, the "lon" coordinates will be changed. Otherwise, it assumes you're passing in numeric data. @@ -878,9 +882,9 @@ def do_it(tmp): # Helper function to check that a list is strictly increasing -def is_strictly_increasing(L): +def is_strictly_increasing(this_list): # https://stackoverflow.com/a/4983359/2965321 - return all(x < y for x, y in zip(L, L[1:])) + return all(x < y for x, y in zip(this_list, this_list[1:])) # Ensure that longitude axis coordinates are monotonically increasing diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index 74e8fd57f4..0b8f1211b7 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -156,7 +156,7 @@ def import_rx_dates(s_or_h, date_infile, incl_patches1d_itype_veg, mxsowings, lo this_var = f"{s_or_h}date{n_sowing+1}_{i}" date_var_list = date_var_list + [this_var] - this_ds = utils.import_ds(date_infile, myVars=date_var_list) + this_ds = utils.import_ds(date_infile, my_vars=date_var_list) for var in this_ds: this_ds = this_ds.rename({var: var.replace(f"{s_or_h}date", "gs")}) @@ -272,9 +272,9 @@ def import_and_process_1yr( print(h1_filelist) dates_ds = utils.import_ds( h1_filelist, - myVars=["SDATES", "HDATES"], - myVegtypes=crops_to_read, - timeSlice=slice(f"{this_year}-01-01", f"{this_year}-12-31"), + my_vars=["SDATES", "HDATES"], + my_vegtypes=crops_to_read, + time_slice=slice(f"{this_year}-01-01", f"{this_year}-12-31"), chunks=chunks, ) @@ -541,8 +541,8 @@ def import_and_process_1yr( error(logger, f"No files found matching pattern '*h2.{this_year-1}-01-01*.nc(.base)'") h2_ds = utils.import_ds( h2_files, - myVars=my_vars, - myVegtypes=crops_to_read, + my_vars=my_vars, + my_vegtypes=crops_to_read, chunks=chunks, ) diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index 5c2e7f8820..1a16387f7d 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -117,9 +117,9 @@ def regrid_ggcmi_shdates( # Import and format latitude if "lat" in template_ds_in: - lat, Nlat = import_coord_1d(template_ds_in, "lat") + lat, n_lat = import_coord_1d(template_ds_in, "lat") elif "LATIXY" in template_ds_in: - lat, Nlat = import_coord_2d(template_ds_in, "lat", "LATIXY") + lat, n_lat = import_coord_2d(template_ds_in, "lat", "LATIXY") lat.attrs["axis"] = "Y" else: abort("No latitude variable found in regrid template file") @@ -130,14 +130,14 @@ def regrid_ggcmi_shdates( # Import and format longitude if "lon" in template_ds_in: - lon, Nlon = import_coord_1d(template_ds_in, "lon") + lon, n_lon = import_coord_1d(template_ds_in, "lon") elif "LONGXY" in template_ds_in: - lon, Nlon = import_coord_2d(template_ds_in, "lon", "LONGXY") + lon, n_lon = import_coord_2d(template_ds_in, "lon", "LONGXY") lon.attrs["axis"] = "Y" else: abort("No longitude variable found in regrid template file") template_da_out = xr.DataArray( - data=np.full((Nlat, Nlon), 0.0), + data=np.full((n_lat, n_lon), 0.0), dims={"lat": lat, "lon": lon}, name="area", ) @@ -159,36 +159,38 @@ def regrid_ggcmi_shdates( if len(input_files) == 0: abort(f"No files found matching {os.path.join(os.getcwd(), pattern)}") input_files.sort() - for f in input_files: - this_crop = f[0:6] + for file in input_files: + this_crop = file[0:6] if crop_list is not None and this_crop not in crop_list: continue logger.info(" " + this_crop) - f2 = os.path.join(regrid_output_directory, f) - f3 = f2.replace(regrid_extension, f"_nninterp-{regrid_resolution}{regrid_extension}") + file_2 = os.path.join(regrid_output_directory, file) + file_3 = file_2.replace( + regrid_extension, f"_nninterp-{regrid_resolution}{regrid_extension}" + ) - if os.path.exists(f3): - os.remove(f3) + if os.path.exists(file_3): + os.remove(file_3) # Sometimes cdo fails for no apparent reason. In testing this never happened more than 3x in a row. try: run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" + f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) except: try: run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" + f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) except: try: run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" + f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) except: run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{f}' '{f3}'" + f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) # Delete template file, which is no longer needed diff --git a/python/ctsm/test/test_unit_utils_import_coord.py b/python/ctsm/test/test_unit_utils_import_coord.py index c5607356fd..6e339a913f 100755 --- a/python/ctsm/test/test_unit_utils_import_coord.py +++ b/python/ctsm/test/test_unit_utils_import_coord.py @@ -62,8 +62,8 @@ def test_importcoord1d(self): Tests importing a 1-d lat/lon variable """ ds = xr.open_dataset(self._1d_lonlat_file) - lat, Nlat = import_coord_1d(ds, "lat") - np.testing.assert_equal(Nlat, 360) + lat, n_lat = import_coord_1d(ds, "lat") + np.testing.assert_equal(n_lat, 360) np.testing.assert_array_equal(lat.values[:4], [89.75, 89.25, 88.75, 88.25]) np.testing.assert_array_equal(lat.values[-4:], [-88.25, -88.75, -89.25, -89.75]) From 5fc5bf2e5ba80ae2dbfe78b453807227230a5ed4 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:34:23 -0700 Subject: [PATCH 66/85] pylint: Resolve no-else-return and no-else-raise. --- python/ctsm/crop_calendars/cropcal_utils.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index 2f84bd6739..d7e7ff93f4 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -189,14 +189,13 @@ def is_this_vegtype(this_vegtype, this_filter, this_method): # Perform the comparison if this_method == "ok_contains": return any(n in this_vegtype for n in this_filter) - elif this_method == "notok_contains": + if this_method == "notok_contains": return not any(n in this_vegtype for n in this_filter) - elif this_method == "ok_exact": + if this_method == "ok_exact": return any(n == this_vegtype for n in this_filter) - elif this_method == "notok_exact": + if this_method == "notok_exact": return not any(n == this_vegtype for n in this_filter) - else: - raise ValueError(f"Unknown comparison method: '{this_method}'") + raise ValueError(f"Unknown comparison method: '{this_method}'") # Get boolean list of whether each vegetation type in list is a managed crop @@ -241,10 +240,9 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): raise TypeError( f"Not sure how to handle vegtype_mainlist as list of {type(vegtype_mainlist[0])}" ) - else: - raise TypeError( - f"Not sure how to handle vegtype_mainlist as type {type(vegtype_mainlist[0])}" - ) + raise TypeError( + f"Not sure how to handle vegtype_mainlist as type {type(vegtype_mainlist[0])}" + ) if vegtype_str.shape == (): indices = np.array([-1]) @@ -847,10 +845,9 @@ def check_ok(tmp, fail_silently): if msg == "": return True - elif fail_silently: + if fail_silently: return False - else: - raise ValueError(msg) + raise ValueError(msg) def do_it(tmp): tmp = tmp + 360 From cbff7d928450a48c972627fbe18599966bd56782 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:35:38 -0700 Subject: [PATCH 67/85] pylint: Resolve singleton-comparison. --- python/ctsm/crop_calendars/cropcal_utils.py | 22 ++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index d7e7ff93f4..a67b2ed346 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -233,7 +233,7 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): vegtype_mainlist = vegtype_mainlist.vegtype_str.values elif isinstance(vegtype_mainlist, xr.DataArray): vegtype_mainlist = vegtype_mainlist.values - elif vegtype_mainlist == None: + elif vegtype_mainlist is None: vegtype_mainlist = define_pftlist() if not isinstance(vegtype_mainlist, list) and isinstance(vegtype_mainlist[0], str): if isinstance(vegtype_mainlist, list): @@ -320,11 +320,11 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr slice_members = [] if selection == slice(0): raise ValueError("slice(0) will be empty") - if selection.start != None: + if selection.start is not None: slice_members = slice_members + [selection.start] - if selection.stop != None: + if selection.stop is not None: slice_members = slice_members + [selection.stop] - if selection.step != None: + if selection.step is not None: slice_members = slice_members + [selection.step] if slice_members == []: raise TypeError("slice is all None?") @@ -346,7 +346,7 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr else: this_type = type(member) break - if this_type == None: + if this_type is None: this_type = int selection = selection.astype(int) else: @@ -401,7 +401,7 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr " name?" ) pattern = re.compile(f"1d_{this_xy}") - matches = [x for x in list(xr_object.keys()) if pattern.search(x) != None] + matches = [x for x in list(xr_object.keys()) if pattern.search(x) is not None] for var in matches: if len(xr_object[var].dims) != 1: raise RuntimeError( @@ -473,14 +473,14 @@ def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): # Rename "pft" dimension and variables to "patch", if needed if "pft" in ds_in.dims: pattern = re.compile("pft.*1d") - matches = [x for x in list(ds_in.keys()) if pattern.search(x) != None] + matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] pft2patch_dict = {"pft": "patch"} for match in matches: pft2patch_dict[match] = match.replace("pft", "patch").replace("patchs", "patches") ds_in = ds_in.rename(pft2patch_dict) derived_vars = [] - if vars_to_import != None: + if vars_to_import is not None: # Split vars_to_import into variables that are vs. aren't already in ds derived_vars = [v for v in vars_to_import if v not in ds_in] present_vars = [v for v in vars_to_import if v in ds_in] @@ -496,7 +496,7 @@ def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): oned_vars = [] for dim in dim_list: pattern = re.compile(f"{dim}.*1d") - matches = [x for x in list(ds_in.keys()) if pattern.search(x) != None] + matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] oned_vars = list(set(oned_vars + matches)) # Add dimensions and _1d variables to vars_to_import @@ -537,7 +537,7 @@ def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): ds_in = xr.merge([ds_in, vegtype_da, patches1d_itype_veg_str]) # Restrict to veg. types of interest, if any - if vegtypes_to_import != None: + if vegtypes_to_import is not None: ds_in = xr_flexsel(ds_in, vegtype=vegtypes_to_import) # Restrict to time slice, if any @@ -590,7 +590,7 @@ def import_ds( my_vegtypes = vegtype_str2int(my_vegtypes) # Same for these variables. - if my_vars != None: + if my_vars is not None: if not isinstance(my_vars, list): my_vars = [my_vars] if my_vars_missing_ok: From 08ce92963a3307022afa0221cee296099179ea24 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:37:59 -0700 Subject: [PATCH 68/85] pylint: Resolve bare-except. --- python/ctsm/crop_calendars/cropcal_utils.py | 4 ++-- python/ctsm/crop_calendars/regrid_ggcmi_shdates.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index a67b2ed346..2efd3508c5 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -774,7 +774,7 @@ def grid_one_variable(this_ds, var, fill_value=None, **kwargs): fill_indices.append(Ellipsis) try: thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values - except: + except: # pylint: disable=bare-except thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values.transpose() if not np.any(np.bitwise_not(np.isnan(thisvar_gridded))): if np.all(np.isnan(thisvar_da.values)): @@ -805,7 +805,7 @@ def grid_one_variable(this_ds, var, fill_value=None, **kwargs): def safer_timeslice(ds_in, time_slice, time_var="time"): try: ds_in = ds_in.sel({time_var: time_slice}) - except: + except: # pylint: disable=bare-except # If the issue might have been slicing using strings, try to fall back to integer slicing if ( isinstance(time_slice.start, str) diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index 1a16387f7d..c8e27a89f9 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -178,17 +178,17 @@ def regrid_ggcmi_shdates( run_and_check( f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) - except: + except: # pylint: disable=bare-except try: run_and_check( f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) - except: + except: # pylint: disable=bare-except try: run_and_check( f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) - except: + except: # pylint: disable=bare-except run_and_check( f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" ) From 58b75f121fb9bc5528f660312eeff32395e0c30e Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:39:43 -0700 Subject: [PATCH 69/85] pylint: Resolve unused-import. --- python/ctsm/crop_calendars/cropcal_utils.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index 2efd3508c5..0f0824d6e8 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -7,13 +7,6 @@ import warnings import importlib -with warnings.catch_warnings(): - warnings.filterwarnings(action="ignore", category=DeprecationWarning) - if importlib.find_loader("cf_units") is not None: - import cf_units as cf - if importlib.find_loader("cartopy") is not None: - from cartopy.util import add_cyclic_point -import cftime import numpy as np import xarray as xr From 3dae192c7424d857e4ed07d4a0b7a3afb5665abc Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 15:50:16 -0700 Subject: [PATCH 70/85] pylint: Add docstrings. --- python/ctsm/crop_calendars/cropcal_utils.py | 139 +++++++++++++----- .../crop_calendars/regrid_ggcmi_shdates.py | 15 +- 2 files changed, 113 insertions(+), 41 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index 0f0824d6e8..db0d1f4777 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -11,8 +11,10 @@ import xarray as xr -# List of PFTs used in CLM def define_pftlist(): + """ + Return list of PFTs used in CLM + """ pftlist = [ "not_vegetated", "needleleaf_evergreen_temperate_tree", @@ -97,8 +99,10 @@ def define_pftlist(): return pftlist -# Get CLM ivt number corresponding to a given name def ivt_str2int(ivt_str): + """ + Get CLM ivt number corresponding to a given name + """ pftlist = define_pftlist() if isinstance(ivt_str, str): ivt_int = pftlist.index(ivt_str) @@ -114,8 +118,10 @@ def ivt_str2int(ivt_str): return ivt_int -# Get CLM ivt name corresponding to a given number def ivt_int2str(ivt_int): + """ + Get CLM ivt name corresponding to a given number + """ pftlist = define_pftlist() if np.issubdtype(type(ivt_int), np.integer) or int(ivt_int) == ivt_int: ivt_str = pftlist[int(ivt_int)] @@ -133,19 +139,19 @@ def ivt_int2str(ivt_int): return ivt_str -# Does this vegetation type's name match (for a given comparison method) any member of a filtering list? -""" -Methods: +def is_this_vegtype(this_vegtype, this_filter, this_method): + """ + Does this vegetation type's name match (for a given comparison method) any member of a filtering + list? + + Methods: ok_contains: True if any member of this_filter is found in this_vegtype. notok_contains: True of no member of this_filter is found in this_vegtype. - ok_exact: True if this_vegtype matches any member of this_filter + ok_exact: True if this_vegtype matches any member of this_filter exactly. - notok_exact: True if this_vegtype does not match any member of + notok_exact: True if this_vegtype does not match any member of this_filter exactly. -""" - - -def is_this_vegtype(this_vegtype, this_filter, this_method): + """ # Make sure data type of this_vegtype is acceptable if isinstance(this_vegtype, float) and int(this_vegtype) == this_vegtype: this_vegtype = int(this_vegtype) @@ -191,33 +197,35 @@ def is_this_vegtype(this_vegtype, this_filter, this_method): raise ValueError(f"Unknown comparison method: '{this_method}'") -# Get boolean list of whether each vegetation type in list is a managed crop -""" - this_vegtypelist: The list of vegetation types whose members you want to - test. - this_filter: The list of strings against which you want to compare - each member of this_vegtypelist. - this_method: How you want to do the comparison. See is_this_vegtype(). -""" - - def is_each_vegtype(this_vegtypelist, this_filter, this_method): + """ + Get boolean list of whether each vegetation type in list is a managed crop + + this_vegtypelist: The list of vegetation types whose members you want to test. + this_filter: The list of strings against which you want to compare each member of + this_vegtypelist. + this_method: How you want to do the comparison. See is_this_vegtype(). + """ if isinstance(this_vegtypelist, xr.DataArray): this_vegtypelist = this_vegtypelist.values return [is_this_vegtype(x, this_filter, this_method) for x in this_vegtypelist] -# List (strings) of managed crops in CLM. def define_mgdcrop_list(): + """ + List (strings) of managed crops in CLM. + """ notcrop_list = ["tree", "grass", "shrub", "unmanaged", "not_vegetated"] defined_pftlist = define_pftlist() is_crop = is_each_vegtype(defined_pftlist, notcrop_list, "notok_contains") return [defined_pftlist[i] for i, x in enumerate(is_crop) if x] -# Convert list of vegtype strings to integer index equivalents. def vegtype_str2int(vegtype_str, vegtype_mainlist=None): + """ + Convert list of vegtype strings to integer index equivalents. + """ convert_to_ndarray = not isinstance(vegtype_str, np.ndarray) if convert_to_ndarray: vegtype_str = np.array(vegtype_str) @@ -248,9 +256,19 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): return indices -# Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. Keyword arguments like dimension=selection. Selections can be individual values or slice()s. Optimize memory usage by beginning keyword argument list with the selections that will result in the largest reduction of object size. Use dimension "vegtype" to extract patches of designated vegetation type (can be string or integer). -# Can also do dimension=function---e.g., time=np.mean will take the mean over the time dimension. def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=True, **kwargs): + """ + Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. + + - Keyword arguments like dimension=selection. + - Selections can be individual values or slice()s. + - Optimize memory usage by beginning keyword argument list with the selections that will result + in the largest reduction of object size. + - Use dimension "vegtype" to extract patches of designated vegetation type (can be string or + integer). + - Can also do dimension=function---e.g., time=np.mean will take the mean over the time + dimension. + """ # Setup havewarned = False delimiter = "__" @@ -438,8 +456,10 @@ def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=Tr return xr_object -# Get PFT of each patch, in both integer and string forms. def get_patch_ivts(this_ds, this_pftlist): + """ + Get PFT of each patch, in both integer and string forms. + """ # First, get all the integer values; should be time*pft or pft*time. We will eventually just take the first timestep. vegtype_int = this_ds.patches1d_itype_veg vegtype_int.values = vegtype_int.values.astype(int) @@ -451,8 +471,10 @@ def get_patch_ivts(this_ds, this_pftlist): return {"int": vegtype_int, "str": vegtype_str, "all_str": this_pftlist} -# Convert a list of strings with vegetation type names into a DataArray. Used to add vegetation type info in import_ds(). def get_vegtype_str_da(vegtype_str): + """ + Convert a list of strings with vegetation type names into a DataArray. Used to add vegetation type info in import_ds(). + """ nvt = len(vegtype_str) vegtype_str_da = xr.DataArray( vegtype_str, coords={"ivt": np.arange(0, nvt)}, dims=["ivt"], name="vegtype_str" @@ -460,9 +482,16 @@ def get_vegtype_str_da(vegtype_str): return vegtype_str_da -# Function to drop unwanted variables in preprocessing of open_mfdataset(), making sure to NOT drop any unspecified variables that will be useful in gridding. Also adds vegetation type info in the form of a DataArray of strings. -# Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be named like "patch". This can later be reversed, for compatibility with other code, using patch2pft(). def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): + """ + Function to drop unwanted variables in preprocessing of open_mfdataset(). + + - Makes sure to NOT drop any unspecified variables that will be useful in gridding. + - Also adds vegetation type info in the form of a DataArray of strings. + - Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be + named like "patch". This can later be reversed, for compatibility with other code, using + patch2pft(). + """ # Rename "pft" dimension and variables to "patch", if needed if "pft" in ds_in.dims: pattern = re.compile("pft.*1d") @@ -564,7 +593,6 @@ def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): return ds_in -# Import a dataset that can be spread over multiple files, only including specified variables and/or vegetation types and/or timesteps, concatenating by time. DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or vegetation types. def import_ds( filelist, my_vars=None, @@ -575,6 +603,13 @@ def import_ds( rename_lsmlatlon=False, chunks=None, ): + """ + Import a dataset that can be spread over multiple files, only including specified variables + and/or vegetation types and/or timesteps, concatenating by time. + + - DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or + vegetation types. + """ # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file in xr.open_mfdataset(). if my_vegtypes is not None: if not isinstance(my_vegtypes, list): @@ -675,8 +710,10 @@ def import_ds( return this_ds -# Return a DataArray, with defined coordinates, for a given variable in a dataset. def get_thisvar_da(var, this_ds): + """ + Return a DataArray, with defined coordinates, for a given variable in a dataset. + """ # Make DataArray for this variable thisvar_da = np.array(this_ds.variables[var]) these_dims = this_ds.variables[var].dims @@ -692,10 +729,16 @@ def get_thisvar_da(var, this_ds): return thisvar_da -# Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, lon) of one variable within a Dataset. Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices along the specified ax(ie)s. -# -# fill_value: Default None means grid will be filled with NaN, unless the variable in question already has a _FillValue, in which case that will be used. def grid_one_variable(this_ds, var, fill_value=None, **kwargs): + """ + Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, + lon) of one variable within a Dataset. + + - Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices + along the specified ax(ie)s. + - fill_value: Default None means grid will be filled with NaN, unless the variable in question + already has a _FillValue, in which case that will be used. + """ # Get this Dataset's values for selection(s), if provided this_ds = xr_flexsel(this_ds, **kwargs) @@ -794,8 +837,12 @@ def grid_one_variable(this_ds, var, fill_value=None, **kwargs): return thisvar_gridded -# ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) for some reason. This function tries to fall back to slicing by integers. It should work with both Datasets and DataArrays. def safer_timeslice(ds_in, time_slice, time_var="time"): + """ + ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) + for some reason. This function tries to fall back to slicing by integers. It should work with + both Datasets and DataArrays. + """ try: ds_in = ds_in.sel({time_var: time_slice}) except: # pylint: disable=bare-except @@ -826,8 +873,15 @@ def safer_timeslice(ds_in, time_slice, time_var="time"): return ds_in -# Convert a longitude axis that's -180 to 180 around the international date line to one that's 0 to 360 around the prime meridian. If you pass in a Dataset or DataArray, the "lon" coordinates will be changed. Otherwise, it assumes you're passing in numeric data. def lon_idl2pm(lons_in, fail_silently=False): + """ + Convert a longitude axis that's -180 to 180 around the international date line to one that's 0 + to 360 around the prime meridian. + + - If you pass in a Dataset or DataArray, the "lon" coordinates will be changed. Otherwise, it + assumes you're passing in numeric data. + """ + def check_ok(tmp, fail_silently): msg = "" @@ -871,14 +925,19 @@ def do_it(tmp): return lons_out -# Helper function to check that a list is strictly increasing def is_strictly_increasing(this_list): - # https://stackoverflow.com/a/4983359/2965321 + """ + Helper function to check that a list is strictly increasing + + https://stackoverflow.com/a/4983359/2965321 + """ return all(x < y for x, y in zip(this_list, this_list[1:])) -# Ensure that longitude axis coordinates are monotonically increasing def make_lon_increasing(xr_obj): + """ + Ensure that longitude axis coordinates are monotonically increasing + """ if not "lon" in xr_obj.dims: return xr_obj diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index c8e27a89f9..8db38ddf71 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -1,3 +1,6 @@ +""" +Regrid GGCMI sowing and harvest date files +""" from subprocess import run import os import glob @@ -40,6 +43,9 @@ def main(): def run_and_check(cmd): + """ + Run a given shell command and check its result + """ result = run( cmd, shell=True, @@ -50,8 +56,12 @@ def run_and_check(cmd): abort(f"Trouble running `{result.args}` in shell:\n{result.stdout}\n{result.stderr}") -# Functionized because these are shared by process_ggcmi_shdates def define_arguments(parser): + """ + Set up arguments shared between regrid_ggcmi_shdates and process_ggcmi_shdates + + Functionized because these are shared by process_ggcmi_shdates + """ # Required parser.add_argument( "-rr", @@ -92,6 +102,9 @@ def regrid_ggcmi_shdates( regrid_extension, crop_list, ): + """ + Regrid GGCMI sowing and harvest date files + """ logger.info(f"Regridding GGCMI crop calendars to {regrid_resolution}:") # Ensure we can call necessary shell script(s) From 538ab01a593b81a29c100a9601816e5bdd531305 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 9 Feb 2024 16:12:11 -0700 Subject: [PATCH 71/85] Move xr_flexsel to its own module; functionize bits of it. --- python/ctsm/crop_calendars/cropcal_utils.py | 201 +------------ .../crop_calendars/generate_gdds_functions.py | 5 +- python/ctsm/crop_calendars/xr_flexsel.py | 263 ++++++++++++++++++ 3 files changed, 267 insertions(+), 202 deletions(-) create mode 100644 python/ctsm/crop_calendars/xr_flexsel.py diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index db0d1f4777..171e0bae56 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -9,6 +9,7 @@ import numpy as np import xarray as xr +from ctsm.crop_calendars.xr_flexsel import xr_flexsel def define_pftlist(): @@ -256,206 +257,6 @@ def vegtype_str2int(vegtype_str, vegtype_mainlist=None): return indices -def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=True, **kwargs): - """ - Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. - - - Keyword arguments like dimension=selection. - - Selections can be individual values or slice()s. - - Optimize memory usage by beginning keyword argument list with the selections that will result - in the largest reduction of object size. - - Use dimension "vegtype" to extract patches of designated vegetation type (can be string or - integer). - - Can also do dimension=function---e.g., time=np.mean will take the mean over the time - dimension. - """ - # Setup - havewarned = False - delimiter = "__" - - for key, selection in kwargs.items(): - if callable(selection): - # It would have been really nice to do selection(xr_object, axis=key), but numpy methods and xarray methods disagree on "axis" vs. "dimension." So instead, just do this manually. - if selection == np.mean: - try: - xr_object = xr_object.mean(dim=key) - except: - raise ValueError( - f"Failed to take mean of dimension {key}. Try doing so outside of" - " xr_flexsel()." - ) - else: - raise ValueError(f"xr_flexsel() doesn't recognize function {selection}") - - elif key == "vegtype": - # Convert to list, if needed - if not isinstance(selection, list): - selection = [selection] - - # Convert to indices, if needed - if isinstance(selection[0], str): - selection = vegtype_str2int(selection) - - # Get list of boolean(s) - if isinstance(selection[0], int): - if isinstance(patches1d_itype_veg, type(None)): - patches1d_itype_veg = xr_object.patches1d_itype_veg.values - elif isinstance(patches1d_itype_veg, xr.core.dataarray.DataArray): - patches1d_itype_veg = patches1d_itype_veg.values - is_vegtype = is_each_vegtype(patches1d_itype_veg, selection, "ok_exact") - elif isinstance(selection[0], bool): - if len(selection) != len(xr_object.patch): - raise ValueError( - "If providing boolean 'vegtype' argument to xr_flexsel(), it must be the" - f" same length as xr_object.patch ({len(selection)} vs." - f" {len(xr_object.patch)})" - ) - is_vegtype = selection - else: - raise TypeError(f"Not sure how to handle 'vegtype' of type {type(selection[0])}") - xr_object = xr_object.isel(patch=[i for i, x in enumerate(is_vegtype) if x]) - if "ivt" in xr_object: - xr_object = xr_object.isel( - ivt=is_each_vegtype(xr_object.ivt.values, selection, "ok_exact") - ) - - else: - # Parse selection type, if provided - if delimiter in key: - key, selection_type = key.split(delimiter) - - # Check type of selection - else: - is_inefficient = False - if isinstance(selection, slice): - slice_members = [] - if selection == slice(0): - raise ValueError("slice(0) will be empty") - if selection.start is not None: - slice_members = slice_members + [selection.start] - if selection.stop is not None: - slice_members = slice_members + [selection.stop] - if selection.step is not None: - slice_members = slice_members + [selection.step] - if slice_members == []: - raise TypeError("slice is all None?") - this_type = int - for member in slice_members: - if member < 0 or not isinstance(member, int): - this_type = "values" - break - elif isinstance(selection, np.ndarray): - if selection.dtype.kind in np.typecodes["AllInteger"]: - this_type = int - else: - is_inefficient = True - this_type = None - for member in selection: - if member < 0 or member % 1 > 0: - if isinstance(member, int): - this_type = "values" - else: - this_type = type(member) - break - if this_type is None: - this_type = int - selection = selection.astype(int) - else: - this_type = type(selection) - - warn_about_this_seltype_interp = warn_about_seltype_interp - if this_type == list and isinstance(selection[0], str): - selection_type = "values" - warn_about_this_seltype_interp = False - elif this_type == int: - selection_type = "indices" - else: - selection_type = "values" - - if warn_about_this_seltype_interp: - # Suggest suppressing selection type interpretation warnings - if not havewarned: - print( - "xr_flexsel(): Suppress all 'selection type interpretation' messages by" - " specifying warn_about_seltype_interp=False" - ) - havewarned = True - if is_inefficient: - extra = " This will also improve efficiency for large selections." - else: - extra = "" - print( - f"xr_flexsel(): Selecting {key} as {selection_type} because selection was" - f" interpreted as {this_type}. If not correct, specify selection type" - " ('indices' or 'values') in keyword like" - f" '{key}{delimiter}SELECTIONTYPE=...' instead of '{key}=...'.{extra}" - ) - - # Trim along relevant 1d axes - if isinstance(xr_object, xr.Dataset) and key in ["lat", "lon"]: - if selection_type == "indices": - incl_coords = xr_object[key].values[selection] - elif selection_type == "values": - if isinstance(selection, slice): - incl_coords = xr_object.sel({key: selection}, drop=False)[key].values - else: - incl_coords = selection - else: - raise TypeError(f"selection_type {selection_type} not recognized") - if key == "lat": - this_xy = "jxy" - elif key == "lon": - this_xy = "ixy" - else: - raise KeyError( - f"Key '{key}' not recognized: What 1d_ suffix should I use for variable" - " name?" - ) - pattern = re.compile(f"1d_{this_xy}") - matches = [x for x in list(xr_object.keys()) if pattern.search(x) is not None] - for var in matches: - if len(xr_object[var].dims) != 1: - raise RuntimeError( - f"Expected {var} to have 1 dimension, but it has" - f" {len(xr_object[var].dims)}: {xr_object[var].dims}" - ) - dim = xr_object[var].dims[0] - # print(f"Variable {var} has dimension {dim}") - coords = xr_object[key].values[xr_object[var].values.astype(int) - 1] - # print(f"{dim} size before: {xr_object.sizes[dim]}") - ok_ind = [] - new_1d_this_xy = [] - for i, member in enumerate(coords): - if member in incl_coords: - ok_ind = ok_ind + [i] - new_1d_this_xy = new_1d_this_xy + [ - (incl_coords == member).nonzero()[0] + 1 - ] - xr_object = xr_object.isel({dim: ok_ind}) - new_1d_this_xy = np.array(new_1d_this_xy).squeeze() - xr_object[var].values = new_1d_this_xy - # print(f"{dim} size after: {xr_object.sizes[dim]}") - - # Perform selection - if selection_type == "indices": - # Have to select like this instead of with index directly because otherwise assign_coords() will throw an error. Not sure why. - if isinstance(selection, int): - # Single integer? Turn it into a slice. - selection = slice(selection, selection + 1) - elif ( - isinstance(selection, np.ndarray) - and not selection.dtype.kind in np.typecodes["AllInteger"] - ): - selection = selection.astype(int) - xr_object = xr_object.isel({key: selection}) - elif selection_type == "values": - xr_object = xr_object.sel({key: selection}) - else: - raise TypeError(f"selection_type {selection_type} not recognized") - - return xr_object - - def get_patch_ivts(this_ds, this_pftlist): """ Get PFT of each patch, in both integer and string forms. diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index 0b8f1211b7..7c015f9dd6 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -20,6 +20,7 @@ sys.path.insert(1, _CTSM_PYTHON) import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position +from ctsm.crop_calendars.xr_flexsel import xr_flexsel # pylint: disable=wrong-import-position CAN_PLOT = True try: @@ -573,10 +574,10 @@ def import_and_process_1yr( continue vegtype_int = utils.vegtype_str2int(vegtype_str)[0] - this_crop_full_patchlist = list(utils.xr_flexsel(h2_ds, vegtype=vegtype_str).patch.values) + this_crop_full_patchlist = list(xr_flexsel(h2_ds, vegtype=vegtype_str).patch.values) # Get time series for each patch of this type - this_crop_ds = utils.xr_flexsel(h2_incl_ds, vegtype=vegtype_str) + this_crop_ds = xr_flexsel(h2_incl_ds, vegtype=vegtype_str) this_crop_gddaccum_da = this_crop_ds[clm_gdd_var] if save_figs: this_crop_gddharv_da = this_crop_ds["GDDHARV"] diff --git a/python/ctsm/crop_calendars/xr_flexsel.py b/python/ctsm/crop_calendars/xr_flexsel.py new file mode 100644 index 0000000000..1e30593946 --- /dev/null +++ b/python/ctsm/crop_calendars/xr_flexsel.py @@ -0,0 +1,263 @@ +""" +Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. +""" +import re +import numpy as np +import xarray as xr + +from ctsm.crop_calendars.cropcal_utils import vegtype_str2int, is_each_vegtype + + +def xr_flexsel(xr_object, patches1d_itype_veg=None, warn_about_seltype_interp=True, **kwargs): + """ + Flexibly subset time(s) and/or vegetation type(s) from an xarray Dataset or DataArray. + + - Keyword arguments like dimension=selection. + - Selections can be individual values or slice()s. + - Optimize memory usage by beginning keyword argument list with the selections that will result + in the largest reduction of object size. + - Use dimension "vegtype" to extract patches of designated vegetation type (can be string or + integer). + - Can also do dimension=function---e.g., time=np.mean will take the mean over the time + dimension. + """ + # Setup + havewarned = False + delimiter = "__" + + for key, selection in kwargs.items(): + if callable(selection): + xr_object = handle_callable(xr_object, key, selection) + + elif key == "vegtype": + xr_object = handle_vegtype(xr_object, patches1d_itype_veg, selection) + + else: + # Parse selection type, if provided + if delimiter in key: + key, selection_type = key.split(delimiter) + + # Check type of selection + else: + is_inefficient = False + if isinstance(selection, slice): + this_type = set_type_from_slice(selection) + elif isinstance(selection, np.ndarray): + selection, is_inefficient, this_type = set_type_from_ndarray(selection) + else: + this_type = type(selection) + + warn_about_this_seltype_interp = warn_about_seltype_interp + if this_type == list and isinstance(selection[0], str): + selection_type = "values" + warn_about_this_seltype_interp = False + elif this_type == int: + selection_type = "indices" + else: + selection_type = "values" + + if warn_about_this_seltype_interp: + do_warn_about_seltype_interp( + havewarned, delimiter, key, selection_type, is_inefficient, this_type + ) + + # Trim along relevant 1d axes + if isinstance(xr_object, xr.Dataset) and key in ["lat", "lon"]: + xr_object = trim_along_relevant_1d_axes(xr_object, selection, selection_type, key) + + # Perform selection + xr_object = perform_selection(xr_object, key, selection, selection_type) + + return xr_object + + +def perform_selection(xr_object, key, selection, selection_type): + """ + Perform selection + """ + if selection_type == "indices": + # Have to select like this instead of with index directly because otherwise assign_coords() + # will throw an error. Not sure why. + if isinstance(selection, int): + # Single integer? Turn it into a slice. + selection = slice(selection, selection + 1) + elif ( + isinstance(selection, np.ndarray) + and not selection.dtype.kind in np.typecodes["AllInteger"] + ): + selection = selection.astype(int) + xr_object = xr_object.isel({key: selection}) + elif selection_type == "values": + xr_object = xr_object.sel({key: selection}) + else: + raise TypeError(f"selection_type {selection_type} not recognized") + return xr_object + + +def trim_along_relevant_1d_axes(xr_object, selection, selection_type, key): + """ + Trim along relevant 1d axes + """ + if selection_type == "indices": + incl_coords = xr_object[key].values[selection] + elif selection_type == "values": + if isinstance(selection, slice): + incl_coords = xr_object.sel({key: selection}, drop=False)[key].values + else: + incl_coords = selection + else: + raise TypeError(f"selection_type {selection_type} not recognized") + if key == "lat": + this_xy = "jxy" + elif key == "lon": + this_xy = "ixy" + else: + raise KeyError( + f"Key '{key}' not recognized: What 1d_ suffix should I use for variable name?" + ) + pattern = re.compile(f"1d_{this_xy}") + matches = [x for x in list(xr_object.keys()) if pattern.search(x) is not None] + for var in matches: + if len(xr_object[var].dims) != 1: + raise RuntimeError( + f"Expected {var} to have 1 dimension, but it has" + f" {len(xr_object[var].dims)}: {xr_object[var].dims}" + ) + dim = xr_object[var].dims[0] + # print(f"Variable {var} has dimension {dim}") + coords = xr_object[key].values[xr_object[var].values.astype(int) - 1] + # print(f"{dim} size before: {xr_object.sizes[dim]}") + ok_ind = [] + new_1d_this_xy = [] + for i, member in enumerate(coords): + if member in incl_coords: + ok_ind = ok_ind + [i] + new_1d_this_xy = new_1d_this_xy + [(incl_coords == member).nonzero()[0] + 1] + xr_object = xr_object.isel({dim: ok_ind}) + new_1d_this_xy = np.array(new_1d_this_xy).squeeze() + xr_object[var].values = new_1d_this_xy + # print(f"{dim} size after: {xr_object.sizes[dim]}") + return xr_object + + +def do_warn_about_seltype_interp( + havewarned, delimiter, key, selection_type, is_inefficient, this_type +): + """ + Suggest suppressing selection type interpretation warnings + """ + if not havewarned: + print( + "xr_flexsel(): Suppress all 'selection type interpretation' messages by specifying" + + "warn_about_seltype_interp=False" + ) + havewarned = True + if is_inefficient: + extra = " This will also improve efficiency for large selections." + else: + extra = "" + print( + f"xr_flexsel(): Selecting {key} as {selection_type} because selection was" + f" interpreted as {this_type}. If not correct, specify selection type" + " ('indices' or 'values') in keyword like" + f" '{key}{delimiter}SELECTIONTYPE=...' instead of '{key}=...'.{extra}" + ) + + +def set_type_from_ndarray(selection): + """ + Sets selection type if given a Numpy array + """ + if selection.dtype.kind in np.typecodes["AllInteger"]: + this_type = int + else: + is_inefficient = True + this_type = None + for member in selection: + if member < 0 or member % 1 > 0: + if isinstance(member, int): + this_type = "values" + else: + this_type = type(member) + break + if this_type is None: + this_type = int + selection = selection.astype(int) + return selection, is_inefficient, this_type + + +def set_type_from_slice(selection): + """ + Sets selection type if given a slice + """ + slice_members = [] + if selection == slice(0): + raise ValueError("slice(0) will be empty") + if selection.start is not None: + slice_members = slice_members + [selection.start] + if selection.stop is not None: + slice_members = slice_members + [selection.stop] + if selection.step is not None: + slice_members = slice_members + [selection.step] + if not slice_members: + raise TypeError("slice is all None?") + this_type = int + for member in slice_members: + if member < 0 or not isinstance(member, int): + this_type = "values" + break + return this_type + + +def handle_vegtype(xr_object, patches1d_itype_veg, selection): + """ + Handle selection "vegtype + """ + # Convert to list, if needed + if not isinstance(selection, list): + selection = [selection] + + # Convert to indices, if needed + if isinstance(selection[0], str): + selection = vegtype_str2int(selection) + + # Get list of boolean(s) + if isinstance(selection[0], int): + if isinstance(patches1d_itype_veg, type(None)): + patches1d_itype_veg = xr_object.patches1d_itype_veg.values + elif isinstance(patches1d_itype_veg, xr.core.dataarray.DataArray): + patches1d_itype_veg = patches1d_itype_veg.values + is_vegtype = is_each_vegtype(patches1d_itype_veg, selection, "ok_exact") + elif isinstance(selection[0], bool): + if len(selection) != len(xr_object.patch): + raise ValueError( + "If providing boolean 'vegtype' argument to xr_flexsel(), it must be the" + f" same length as xr_object.patch ({len(selection)} vs." + f" {len(xr_object.patch)})" + ) + is_vegtype = selection + else: + raise TypeError(f"Not sure how to handle 'vegtype' of type {type(selection[0])}") + xr_object = xr_object.isel(patch=[i for i, x in enumerate(is_vegtype) if x]) + if "ivt" in xr_object: + xr_object = xr_object.isel(ivt=is_each_vegtype(xr_object.ivt.values, selection, "ok_exact")) + + return xr_object + + +def handle_callable(xr_object, key, selection): + """ + Handle selection that's a callable + """ + # It would have been really nice to do selection(xr_object, axis=key), but numpy methods and + # xarray methods disagree on "axis" vs. "dimension." So instead, just do this manually. + if selection == np.mean: # pylint: disable=comparison-with-callable + try: + xr_object = xr_object.mean(dim=key) + except: + raise ValueError( + f"Failed to take mean of dimension {key}. Try doing so outside of xr_flexsel()." + ) + else: + raise ValueError(f"xr_flexsel() doesn't recognize function {selection}") + return xr_object From f7ad444a95ff1c8be15b727ab9022101793927f1 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:02:21 -0700 Subject: [PATCH 72/85] Resolve pylint for regrid_ggcmi_shdates.py. --- .../crop_calendars/regrid_ggcmi_shdates.py | 102 ++++++++++-------- 1 file changed, 58 insertions(+), 44 deletions(-) diff --git a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py index 8db38ddf71..b1988aa8b5 100644 --- a/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py +++ b/python/ctsm/crop_calendars/regrid_ggcmi_shdates.py @@ -6,9 +6,9 @@ import glob import argparse import sys +import logging import xarray as xr import numpy as np -import logging # -- add python/ctsm to path (needed if we want to run regrid_ggcmi_shdates stand-alone) _CTSM_PYTHON = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir) @@ -51,6 +51,7 @@ def run_and_check(cmd): shell=True, capture_output=True, text=True, + check=False, ) if result.returncode != 0: abort(f"Trouble running `{result.args}` in shell:\n{result.stdout}\n{result.stderr}") @@ -73,7 +74,11 @@ def define_arguments(parser): parser.add_argument( "-rt", "--regrid-template-file", - help="Template netCDF file to be used in regridding of inputs. This can be a CLM output file (i.e., something with 1-d lat and lon variables) or a CLM surface dataset (i.e., something with 2-d LATIXY and LONGXY variables).", + help=( + "Template netCDF file to be used in regridding of inputs. This can be a CLM output " + + "file (i.e., something with 1-d lat and lon variables) or a CLM surface dataset " + + "(i.e., something with 2-d LATIXY and LONGXY variables)." + ), type=str, required=True, ) @@ -88,7 +93,10 @@ def define_arguments(parser): parser.add_argument( "-c", "--crop-list", - help="List of GGCMI crops to process; e.g., '--crop-list mai_rf,mai_ir'. If not provided, will process all GGCMI crops.", + help=( + "List of GGCMI crops to process; e.g., '--crop-list mai_rf,mai_ir'. If not provided, " + + "will process all GGCMI crops." + ), default=None, ) return parser @@ -105,7 +113,7 @@ def regrid_ggcmi_shdates( """ Regrid GGCMI sowing and harvest date files """ - logger.info(f"Regridding GGCMI crop calendars to {regrid_resolution}:") + logger.info("Regridding GGCMI crop calendars to %s:", regrid_resolution) # Ensure we can call necessary shell script(s) for cmd in ["module load cdo; cdo"]: @@ -129,31 +137,7 @@ def regrid_ggcmi_shdates( regrid_extension = "." + regrid_extension # Import and format latitude - if "lat" in template_ds_in: - lat, n_lat = import_coord_1d(template_ds_in, "lat") - elif "LATIXY" in template_ds_in: - lat, n_lat = import_coord_2d(template_ds_in, "lat", "LATIXY") - lat.attrs["axis"] = "Y" - else: - abort("No latitude variable found in regrid template file") - - # Flip latitude, if needed - if lat.values[0] < lat.values[1]: - lat = lat.reindex(lat=list(reversed(lat["lat"]))) - - # Import and format longitude - if "lon" in template_ds_in: - lon, n_lon = import_coord_1d(template_ds_in, "lon") - elif "LONGXY" in template_ds_in: - lon, n_lon = import_coord_2d(template_ds_in, "lon", "LONGXY") - lon.attrs["axis"] = "Y" - else: - abort("No longitude variable found in regrid template file") - template_da_out = xr.DataArray( - data=np.full((n_lat, n_lon), 0.0), - dims={"lat": lat, "lon": lon}, - name="area", - ) + lat, lon, template_da_out = get_template_da_out(template_ds_in) # Save template Dataset for use by cdo template_ds_out = xr.Dataset( @@ -177,7 +161,7 @@ def regrid_ggcmi_shdates( if crop_list is not None and this_crop not in crop_list: continue - logger.info(" " + this_crop) + logger.info(" %s", this_crop) file_2 = os.path.join(regrid_output_directory, file) file_3 = file_2.replace( regrid_extension, f"_nninterp-{regrid_resolution}{regrid_extension}" @@ -186,31 +170,61 @@ def regrid_ggcmi_shdates( if os.path.exists(file_3): os.remove(file_3) - # Sometimes cdo fails for no apparent reason. In testing this never happened more than 3x in a row. + # Sometimes cdo fails for no apparent reason. In testing this never happened more than 3x + # in a row. + cdo_cmd = ( + f"module load cdo; cdo -L -remapnn,'{templatefile}' " + + f"-setmisstonn '{file}' '{file_3}'" + ) try: - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" - ) + run_and_check(cdo_cmd) except: # pylint: disable=bare-except try: - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" - ) + run_and_check(cdo_cmd) except: # pylint: disable=bare-except try: - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" - ) + run_and_check(cdo_cmd) except: # pylint: disable=bare-except - run_and_check( - f"module load cdo; cdo -L -remapnn,'{templatefile}' -setmisstonn '{file}' '{file_3}'" - ) + run_and_check(cdo_cmd) # Delete template file, which is no longer needed os.remove(templatefile) os.chdir(previous_dir) +def get_template_da_out(template_ds_in): + """ + Get template output DataArray from input Dataset + """ + if "lat" in template_ds_in: + lat, n_lat = import_coord_1d(template_ds_in, "lat") + elif "LATIXY" in template_ds_in: + lat, n_lat = import_coord_2d(template_ds_in, "lat", "LATIXY") + lat.attrs["axis"] = "Y" + else: + abort("No latitude variable found in regrid template file") + + # Flip latitude, if needed + if lat.values[0] < lat.values[1]: + lat = lat.reindex(lat=list(reversed(lat["lat"]))) + + # Import and format longitude + if "lon" in template_ds_in: + lon, n_lon = import_coord_1d(template_ds_in, "lon") + elif "LONGXY" in template_ds_in: + lon, n_lon = import_coord_2d(template_ds_in, "lon", "LONGXY") + lon.attrs["axis"] = "Y" + else: + abort("No longitude variable found in regrid template file") + template_da_out = xr.DataArray( + data=np.full((n_lat, n_lon), 0.0), + dims={"lat": lat, "lon": lon}, + name="area", + ) + + return lat, lon, template_da_out + + def regrid_ggcmi_shdates_arg_process(): """Process input arguments @@ -222,7 +236,7 @@ def regrid_ggcmi_shdates_arg_process(): ctsm_logging.setup_logging_pre_config() parser = argparse.ArgumentParser( - description="Regrids raw sowing and harvest date files provided by GGCMI to a target CLM resolution." + description=("Regrid raw sowing/harvest date files from GGCMI to a target CLM resolution."), ) # Define arguments From 35df413cdbbe5d1af714af7ef8429a4dd946332b Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:03:55 -0700 Subject: [PATCH 73/85] Move grid_one_variable() to its own module; functionize parts. --- python/ctsm/crop_calendars/cropcal_utils.py | 132 +------------ .../crop_calendars/generate_gdds_functions.py | 7 +- .../ctsm/crop_calendars/grid_one_variable.py | 179 ++++++++++++++++++ 3 files changed, 187 insertions(+), 131 deletions(-) create mode 100644 python/ctsm/crop_calendars/grid_one_variable.py diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index 171e0bae56..ebc275279c 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -511,133 +511,6 @@ def import_ds( return this_ds -def get_thisvar_da(var, this_ds): - """ - Return a DataArray, with defined coordinates, for a given variable in a dataset. - """ - # Make DataArray for this variable - thisvar_da = np.array(this_ds.variables[var]) - these_dims = this_ds.variables[var].dims - thisvar_da = xr.DataArray(thisvar_da, dims=these_dims) - - # Define coordinates of this variable's DataArray - dims_dict = dict() - for dim in these_dims: - dims_dict[dim] = this_ds[dim] - thisvar_da = thisvar_da.assign_coords(dims_dict) - thisvar_da.attrs = this_ds[var].attrs - - return thisvar_da - - -def grid_one_variable(this_ds, var, fill_value=None, **kwargs): - """ - Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, - lon) of one variable within a Dataset. - - - Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices - along the specified ax(ie)s. - - fill_value: Default None means grid will be filled with NaN, unless the variable in question - already has a _FillValue, in which case that will be used. - """ - # Get this Dataset's values for selection(s), if provided - this_ds = xr_flexsel(this_ds, **kwargs) - - # Get DataArrays needed for gridding - thisvar_da = get_thisvar_da(var, this_ds) - vt_da = None - if "patch" in thisvar_da.dims: - spatial_unit = "patch" - xy_1d_prefix = "patches" - if "patches1d_itype_veg" in this_ds: - vt_da = get_thisvar_da("patches1d_itype_veg", this_ds) - elif "gridcell" in thisvar_da.dims: - spatial_unit = "gridcell" - xy_1d_prefix = "grid" - else: - raise RuntimeError( - f"What variables to use for _ixy and _jxy of variable with dims {thisvar_da.dims}?" - ) - ixy_da = get_thisvar_da(xy_1d_prefix + "1d_ixy", this_ds) - jxy_da = get_thisvar_da(xy_1d_prefix + "1d_jxy", this_ds) - - if not fill_value and "_FillValue" in thisvar_da.attrs: - fill_value = thisvar_da.attrs["_FillValue"] - - # Renumber vt_da to work as indices on new ivt dimension, if needed. - ### Ensures that the unique set of vt_da values begins with 1 and - ### contains no missing steps. - if "ivt" in this_ds and vt_da is not None: - vt_da.values = np.array([np.where(this_ds.ivt.values == x)[0][0] for x in vt_da.values]) - - # Get new dimension list - new_dims = list(thisvar_da.dims) - ### Remove "[spatial_unit]". - if spatial_unit in new_dims: - new_dims.remove(spatial_unit) - # Add "ivt_str" (vegetation type, as string). This needs to go at the end, to avoid a possible situation where you wind up with multiple Ellipsis members of fill_indices. - if "ivt" in this_ds and spatial_unit == "patch": - new_dims.append("ivt_str") - ### Add lat and lon to end of list - new_dims = new_dims + ["lat", "lon"] - - # Set up empty array - dim_size_list = [] - for dim in new_dims: - if dim == "ivt_str": - dim_size = this_ds.sizes["ivt"] - elif dim in thisvar_da.coords: - dim_size = thisvar_da.sizes[dim] - else: - dim_size = this_ds.sizes[dim] - dim_size_list = dim_size_list + [dim_size] - thisvar_gridded = np.empty(dim_size_list) - if fill_value: - thisvar_gridded[:] = fill_value - else: - thisvar_gridded[:] = np.NaN - - # Fill with this variable - fill_indices = [] - for dim in new_dims: - if dim == "lat": - fill_indices.append(jxy_da.values.astype(int) - 1) - elif dim == "lon": - fill_indices.append(ixy_da.values.astype(int) - 1) - elif dim == "ivt_str": - fill_indices.append(vt_da) - elif not fill_indices: - # I.e., if fill_indices is empty. Could also do "elif len(fill_indices)==0". - fill_indices.append(Ellipsis) - try: - thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values - except: # pylint: disable=bare-except - thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values.transpose() - if not np.any(np.bitwise_not(np.isnan(thisvar_gridded))): - if np.all(np.isnan(thisvar_da.values)): - print("Warning: This DataArray (and thus map) is all NaN") - else: - raise RuntimeError("thisvar_gridded was not filled!") - - # Assign coordinates, attributes and name - thisvar_gridded = xr.DataArray(thisvar_gridded, dims=tuple(new_dims), attrs=thisvar_da.attrs) - for dim in new_dims: - if dim == "ivt_str": - values = this_ds.vegtype_str.values - elif dim in thisvar_da.coords: - values = thisvar_da[dim] - else: - values = this_ds[dim].values - thisvar_gridded = thisvar_gridded.assign_coords({dim: values}) - thisvar_gridded.name = var - - # Add FillValue attribute - if fill_value: - thisvar_gridded.attrs["_FillValue"] = fill_value - - return thisvar_gridded - - def safer_timeslice(ds_in, time_slice, time_var="time"): """ ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) @@ -648,7 +521,7 @@ def safer_timeslice(ds_in, time_slice, time_var="time"): ds_in = ds_in.sel({time_var: time_slice}) except: # pylint: disable=bare-except # If the issue might have been slicing using strings, try to fall back to integer slicing - if ( + can_try_integer_slicing = ( isinstance(time_slice.start, str) and isinstance(time_slice.stop, str) and len(time_slice.start.split("-")) == 3 @@ -658,7 +531,8 @@ def safer_timeslice(ds_in, time_slice, time_var="time"): time_slice.stop.split("-")[1:] == ["12", "31"] or time_slice.stop.split("-")[1:] == ["01", "01"] ) - ): + ) + if can_try_integer_slicing: fileyears = np.array([x.year for x in ds_in.time.values]) if len(np.unique(fileyears)) != len(fileyears): print("Could not fall back to integer slicing of years: Time axis not annual") diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index 7c015f9dd6..cb7315c00c 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -21,6 +21,9 @@ import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position from ctsm.crop_calendars.xr_flexsel import xr_flexsel # pylint: disable=wrong-import-position +from ctsm.crop_calendars.grid_one_variable import ( # pylint: disable=wrong-import-position + grid_one_variable, +) CAN_PLOT = True try: @@ -75,7 +78,7 @@ def check_sdates(dates_ds, sdates_rx, logger, verbose=False): """ log(logger, " Checking that input and output sdates match...") - sdates_grid = utils.grid_one_variable(dates_ds, "SDATES") + sdates_grid = grid_one_variable(dates_ds, "SDATES") all_ok = True any_found = False @@ -212,7 +215,7 @@ def yp_list_to_ds(yp_list, daily_ds, incl_vegtypes_str, dates_rx, longname_prefi # Grid this crop this_ds["tmp"] = this_da - da_gridded = utils.grid_one_variable(this_ds, "tmp", vegtype=this_crop_str) + da_gridded = grid_one_variable(this_ds, "tmp", vegtype=this_crop_str) da_gridded = da_gridded.squeeze(drop=True) # Add singleton time dimension and save to output Dataset diff --git a/python/ctsm/crop_calendars/grid_one_variable.py b/python/ctsm/crop_calendars/grid_one_variable.py new file mode 100644 index 0000000000..cb5d330032 --- /dev/null +++ b/python/ctsm/crop_calendars/grid_one_variable.py @@ -0,0 +1,179 @@ +""" +Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, +lon) of one variable within a Dataset. + +- Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices + along the specified ax(ie)s. +- fill_value: Default None means grid will be filled with NaN, unless the variable in question + already has a _FillValue, in which case that will be used. +""" +import numpy as np +import xarray as xr +from ctsm.crop_calendars.xr_flexsel import xr_flexsel + + +def get_thisvar_da(var, this_ds): + """ + Return a DataArray, with defined coordinates, for a given variable in a dataset. + """ + # Make DataArray for this variable + thisvar_da = np.array(this_ds.variables[var]) + these_dims = this_ds.variables[var].dims + thisvar_da = xr.DataArray(thisvar_da, dims=these_dims) + + # Define coordinates of this variable's DataArray + dims_dict = dict() + for dim in these_dims: + dims_dict[dim] = this_ds[dim] + thisvar_da = thisvar_da.assign_coords(dims_dict) + thisvar_da.attrs = this_ds[var].attrs + + return thisvar_da + + +def convert_to_da(this_ds, var, fill_value, thisvar_da, new_dims, thisvar_gridded): + """ + Convert Numpy array to DataArray with coordinates, attributes and name + """ + thisvar_gridded = xr.DataArray(thisvar_gridded, dims=tuple(new_dims), attrs=thisvar_da.attrs) + for dim in new_dims: + if dim == "ivt_str": + values = this_ds.vegtype_str.values + elif dim in thisvar_da.coords: + values = thisvar_da[dim] + else: + values = this_ds[dim].values + thisvar_gridded = thisvar_gridded.assign_coords({dim: values}) + thisvar_gridded.name = var + + # Add FillValue attribute + if fill_value: + thisvar_gridded.attrs["_FillValue"] = fill_value + return thisvar_gridded + + +def grid_the_data(thisvar_da, vt_da, ixy_da, jxy_da, new_dims, thisvar_gridded): + """ + Fill lat-lon array with previously-ungridded data + """ + fill_indices = [] + for dim in new_dims: + if dim == "lat": + fill_indices.append(jxy_da.values.astype(int) - 1) + elif dim == "lon": + fill_indices.append(ixy_da.values.astype(int) - 1) + elif dim == "ivt_str": + fill_indices.append(vt_da) + elif not fill_indices: + # I.e., if fill_indices is empty. Could also do "elif len(fill_indices)==0". + fill_indices.append(Ellipsis) + try: + thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values + except: # pylint: disable=bare-except + thisvar_gridded[tuple(fill_indices[: len(fill_indices)])] = thisvar_da.values.transpose() + if not np.any(np.bitwise_not(np.isnan(thisvar_gridded))): + if np.all(np.isnan(thisvar_da.values)): + print("Warning: This DataArray (and thus map) is all NaN") + else: + raise RuntimeError("thisvar_gridded was not filled!") + + +def create_filled_array(this_ds, fill_value, thisvar_da, new_dims): + """ + Create a Numpy array to be filled with gridded data + """ + dim_size_list = [] + for dim in new_dims: + if dim == "ivt_str": + dim_size = this_ds.sizes["ivt"] + elif dim in thisvar_da.coords: + dim_size = thisvar_da.sizes[dim] + else: + dim_size = this_ds.sizes[dim] + dim_size_list = dim_size_list + [dim_size] + thisvar_gridded = np.empty(dim_size_list) + if fill_value: + thisvar_gridded[:] = fill_value + else: + thisvar_gridded[:] = np.NaN + return thisvar_gridded + + +def get_ixy_jxy_das(this_ds, var): + """ + Get DataArrays needed for gridding + """ + thisvar_da = get_thisvar_da(var, this_ds) + vt_da = None + if "patch" in thisvar_da.dims: + spatial_unit = "patch" + xy_1d_prefix = "patches" + if "patches1d_itype_veg" in this_ds: + vt_da = get_thisvar_da("patches1d_itype_veg", this_ds) + elif "gridcell" in thisvar_da.dims: + spatial_unit = "gridcell" + xy_1d_prefix = "grid" + else: + raise RuntimeError( + f"What variables to use for _ixy and _jxy of variable with dims {thisvar_da.dims}?" + ) + ixy_da = get_thisvar_da(xy_1d_prefix + "1d_ixy", this_ds) + jxy_da = get_thisvar_da(xy_1d_prefix + "1d_jxy", this_ds) + return thisvar_da, vt_da, spatial_unit, ixy_da, jxy_da + + +def get_new_dim_list(this_ds, thisvar_da, spatial_unit): + """ + Get new dimension list + """ + new_dims = list(thisvar_da.dims) + ### Remove "[spatial_unit]". + if spatial_unit in new_dims: + new_dims.remove(spatial_unit) + # Add "ivt_str" (vegetation type, as string). This needs to go at the end, to avoid a possible + # situation where you wind up with multiple Ellipsis members of fill_indices. + if "ivt" in this_ds and spatial_unit == "patch": + new_dims.append("ivt_str") + ### Add lat and lon to end of list + new_dims = new_dims + ["lat", "lon"] + return new_dims + + +def grid_one_variable(this_ds, var, fill_value=None, **kwargs): + """ + Make a geographically gridded DataArray (with dimensions time, vegetation type [as string], lat, + lon) of one variable within a Dataset. + + - Optional keyword arguments will be passed to xr_flexsel() to select single steps or slices + along the specified ax(ie)s. + - fill_value: Default None means grid will be filled with NaN, unless the variable in question + already has a _FillValue, in which case that will be used. + """ + # Get this Dataset's values for selection(s), if provided + this_ds = xr_flexsel(this_ds, **kwargs) + + # Get DataArrays needed for gridding + thisvar_da, vt_da, spatial_unit, ixy_da, jxy_da = get_ixy_jxy_das(this_ds, var) + + if not fill_value and "_FillValue" in thisvar_da.attrs: + fill_value = thisvar_da.attrs["_FillValue"] + + # Renumber vt_da to work as indices on new ivt dimension, if needed. + ### Ensures that the unique set of vt_da values begins with 1 and + ### contains no missing steps. + if "ivt" in this_ds and vt_da is not None: + vt_da.values = np.array([np.where(this_ds.ivt.values == x)[0][0] for x in vt_da.values]) + + # Get new dimension list + new_dims = get_new_dim_list(this_ds, thisvar_da, spatial_unit) + + # Create a Numpy array to be filled with gridded data + thisvar_gridded = create_filled_array(this_ds, fill_value, thisvar_da, new_dims) + + # Fill lat-lon array with previously-ungridded data + grid_the_data(thisvar_da, vt_da, ixy_da, jxy_da, new_dims, thisvar_gridded) + + # Convert Numpy array to DataArray with coordinates, attributes and name + thisvar_gridded = convert_to_da(this_ds, var, fill_value, thisvar_da, new_dims, thisvar_gridded) + + return thisvar_gridded From 81ac26e9e25bb4b4edb33c99cee3b12dc3ca4641 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:12:54 -0700 Subject: [PATCH 74/85] Resolve most issues with import_ds(). --- python/ctsm/crop_calendars/cropcal_utils.py | 31 +++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index ebc275279c..fd35686fa1 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -5,7 +5,7 @@ import re import warnings -import importlib +from importlib.util import find_spec import numpy as np import xarray as xr @@ -399,8 +399,7 @@ def import_ds( my_vars=None, my_vegtypes=None, time_slice=None, - my_vars_missing_ok=[], - only_active_patches=False, + my_vars_missing_ok=None, rename_lsmlatlon=False, chunks=None, ): @@ -411,7 +410,10 @@ def import_ds( - DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or vegetation types. """ - # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file in xr.open_mfdataset(). + if my_vars_missing_ok is None: + my_vars_missing_ok = [] + # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file + # in xr.open_mfdataset(). if my_vegtypes is not None: if not isinstance(my_vegtypes, list): my_vegtypes = [my_vegtypes] @@ -433,7 +435,10 @@ def import_ds( my_vars_missing_ok = [my_vars_missing_ok] # Remove files from list if they don't contain requested timesteps. - # time_slice should be in the format slice(start,end[,step]). start or end can be None to be unbounded on one side. Note that the standard slice() documentation suggests that only elements through end-1 will be selected, but that seems not to be the case in the xarray implementation. + # time_slice should be in the format slice(start,end[,step]). start or end can be None to be + # unbounded on one side. Note that the standard slice() documentation suggests that only + # elements through end-1 will be selected, but that seems not to be the case in the xarray + # implementation. if time_slice: new_filelist = [] for file in sorted(filelist): @@ -443,14 +448,18 @@ def import_ds( if include_this_file: new_filelist.append(file) - # If you found some matching files, but then you find one that doesn't, stop going through the list. + # If you found some matching files, but then you find one that doesn't, stop going + # through the list. elif new_filelist: break if not new_filelist: raise RuntimeError(f"No files found in time_slice {time_slice}") filelist = new_filelist - # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes directly, but that's bad practice as it could lead to scoping issues. + # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one + # variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function + # allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes + # directly, but that's bad practice as it could lead to scoping issues. mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, my_vars, my_vegtypes, time_slice) # Import @@ -459,7 +468,7 @@ def import_ds( if isinstance(filelist, list): with warnings.catch_warnings(): warnings.filterwarnings(action="ignore", category=DeprecationWarning) - if importlib.find_loader("dask") is None: + if find_spec("dask") is None: raise ModuleNotFoundError( "You have asked xarray to import a list of files as a single Dataset using" " open_mfdataset(), but this requires dask, which is not available.\nFile" @@ -480,12 +489,6 @@ def import_ds( this_ds = mfdataset_preproc(this_ds, my_vars, my_vegtypes, time_slice) this_ds = this_ds.compute() - # Include only active patches (or whatever) - if only_active_patches: - is_active = this_ds.patches1d_active.values - p_active = np.where(is_active)[0] - this_ds_active = this_ds.isel(patch=p_active) - # Warn and/or error about variables that couldn't be imported or derived if my_vars: missing_vars = [v for v in my_vars if v not in this_ds] From c2899bde3417f2373c408c1f5a567dc9d2b185d4 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:27:26 -0700 Subject: [PATCH 75/85] Move import_ds() to its own module. --- python/ctsm/crop_calendars/cropcal_module.py | 7 +- python/ctsm/crop_calendars/cropcal_utils.py | 239 +--------------- .../crop_calendars/generate_gdds_functions.py | 9 +- python/ctsm/crop_calendars/import_ds.py | 267 ++++++++++++++++++ 4 files changed, 279 insertions(+), 243 deletions(-) create mode 100644 python/ctsm/crop_calendars/import_ds.py diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index b3b415b77c..671a6334c7 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -25,6 +25,9 @@ from ctsm.crop_calendars.cropcal_constants import ( # pylint: disable=wrong-import-position DEFAULT_GDD_MIN, ) +from ctsm.crop_calendars.import_ds import ( # pylint: disable=wrong-import-position + import_ds, +) def check_and_trim_years(year_1, year_n, ds_in): @@ -266,7 +269,7 @@ def import_rx_dates(var_prefix, date_infile, dates_ds, set_neg1_to_nan=True): this_var = f"{var_prefix}{j+1}_{i}" date_varlist = date_varlist + [this_var] - this_ds = utils.import_ds(date_infile, my_vars=date_varlist) + this_ds = import_ds(date_infile, my_vars=date_varlist) did_warn = False for var in this_ds: @@ -355,7 +358,7 @@ def import_output( Import CLM output """ # Import - this_ds = utils.import_ds(filename, my_vars=my_vars, my_vegtypes=my_vegtypes) + this_ds = import_ds(filename, my_vars=my_vars, my_vegtypes=my_vegtypes) # Trim to years of interest (do not include extra year needed for finishing last growing season) if year_1 and year_n: diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index fd35686fa1..e2b94070a1 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -2,14 +2,8 @@ utility functions copied from klindsay, https://github.com/klindsay28/CESM2_coup_carb_cycle_JAMES/blob/master/utils.py """ - -import re -import warnings -from importlib.util import find_spec - import numpy as np import xarray as xr -from ctsm.crop_calendars.xr_flexsel import xr_flexsel def define_pftlist(): @@ -274,7 +268,7 @@ def get_patch_ivts(this_ds, this_pftlist): def get_vegtype_str_da(vegtype_str): """ - Convert a list of strings with vegetation type names into a DataArray. Used to add vegetation type info in import_ds(). + Convert a list of strings with vegetation type names into a DataArray. """ nvt = len(vegtype_str) vegtype_str_da = xr.DataArray( @@ -283,237 +277,6 @@ def get_vegtype_str_da(vegtype_str): return vegtype_str_da -def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): - """ - Function to drop unwanted variables in preprocessing of open_mfdataset(). - - - Makes sure to NOT drop any unspecified variables that will be useful in gridding. - - Also adds vegetation type info in the form of a DataArray of strings. - - Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be - named like "patch". This can later be reversed, for compatibility with other code, using - patch2pft(). - """ - # Rename "pft" dimension and variables to "patch", if needed - if "pft" in ds_in.dims: - pattern = re.compile("pft.*1d") - matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] - pft2patch_dict = {"pft": "patch"} - for match in matches: - pft2patch_dict[match] = match.replace("pft", "patch").replace("patchs", "patches") - ds_in = ds_in.rename(pft2patch_dict) - - derived_vars = [] - if vars_to_import is not None: - # Split vars_to_import into variables that are vs. aren't already in ds - derived_vars = [v for v in vars_to_import if v not in ds_in] - present_vars = [v for v in vars_to_import if v in ds_in] - vars_to_import = present_vars - - # Get list of dimensions present in variables in vars_to_import. - dim_list = [] - for var in vars_to_import: - # list(set(x)) returns a list of the unique items in x - dim_list = list(set(dim_list + list(ds_in.variables[var].dims))) - - # Get any _1d variables that are associated with those dimensions. These will be useful in gridding. Also, if any dimension is "pft", set up to rename it and all like-named variables to "patch" - oned_vars = [] - for dim in dim_list: - pattern = re.compile(f"{dim}.*1d") - matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] - oned_vars = list(set(oned_vars + matches)) - - # Add dimensions and _1d variables to vars_to_import - vars_to_import = list(set(vars_to_import + list(ds_in.dims) + oned_vars)) - - # Add any _bounds variables - bounds_vars = [] - for var in vars_to_import: - bounds_var = var + "_bounds" - if bounds_var in ds_in: - bounds_vars = bounds_vars + [bounds_var] - vars_to_import = vars_to_import + bounds_vars - - # Get list of variables to drop - varlist = list(ds_in.variables) - vars_to_drop = list(np.setdiff1d(varlist, vars_to_import)) - - # Drop them - ds_in = ds_in.drop_vars(vars_to_drop) - - # Add vegetation type info - if "patches1d_itype_veg" in list(ds_in): - this_pftlist = define_pftlist() - get_patch_ivts( - ds_in, this_pftlist - ) # Includes check of whether vegtype changes over time anywhere - vegtype_da = get_vegtype_str_da(this_pftlist) - patches1d_itype_veg_str = vegtype_da.values[ - ds_in.isel(time=0).patches1d_itype_veg.values.astype(int) - ] - npatch = len(patches1d_itype_veg_str) - patches1d_itype_veg_str = xr.DataArray( - patches1d_itype_veg_str, - coords={"patch": np.arange(0, npatch)}, - dims=["patch"], - name="patches1d_itype_veg_str", - ) - ds_in = xr.merge([ds_in, vegtype_da, patches1d_itype_veg_str]) - - # Restrict to veg. types of interest, if any - if vegtypes_to_import is not None: - ds_in = xr_flexsel(ds_in, vegtype=vegtypes_to_import) - - # Restrict to time slice, if any - if time_slice: - ds_in = safer_timeslice(ds_in, time_slice) - - # Finish import - ds_in = xr.decode_cf(ds_in, decode_times=True) - - # Compute derived variables - for var in derived_vars: - if ( - var == "HYEARS" - and "HDATES" in ds_in - and ds_in.HDATES.dims == ("time", "mxharvests", "patch") - ): - year_list = np.array([np.float32(x.year - 1) for x in ds_in.time.values]) - hyears = ds_in["HDATES"].copy() - hyears.values = np.tile( - np.expand_dims(year_list, (1, 2)), - (1, ds_in.dims["mxharvests"], ds_in.dims["patch"]), - ) - with np.errstate(invalid="ignore"): - is_le_zero = ~np.isnan(ds_in.HDATES.values) & (ds_in.HDATES.values <= 0) - hyears.values[is_le_zero] = ds_in.HDATES.values[is_le_zero] - hyears.values[np.isnan(ds_in.HDATES.values)] = np.nan - hyears.attrs["long_name"] = "DERIVED: actual crop harvest years" - hyears.attrs["units"] = "year" - ds_in["HYEARS"] = hyears - - return ds_in - - -def import_ds( - filelist, - my_vars=None, - my_vegtypes=None, - time_slice=None, - my_vars_missing_ok=None, - rename_lsmlatlon=False, - chunks=None, -): - """ - Import a dataset that can be spread over multiple files, only including specified variables - and/or vegetation types and/or timesteps, concatenating by time. - - - DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or - vegetation types. - """ - if my_vars_missing_ok is None: - my_vars_missing_ok = [] - # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file - # in xr.open_mfdataset(). - if my_vegtypes is not None: - if not isinstance(my_vegtypes, list): - my_vegtypes = [my_vegtypes] - if isinstance(my_vegtypes[0], str): - my_vegtypes = vegtype_str2int(my_vegtypes) - - # Same for these variables. - if my_vars is not None: - if not isinstance(my_vars, list): - my_vars = [my_vars] - if my_vars_missing_ok: - if not isinstance(my_vars_missing_ok, list): - my_vars_missing_ok = [my_vars_missing_ok] - - # Make sure lists are actually lists - if not isinstance(filelist, list): - filelist = [filelist] - if not isinstance(my_vars_missing_ok, list): - my_vars_missing_ok = [my_vars_missing_ok] - - # Remove files from list if they don't contain requested timesteps. - # time_slice should be in the format slice(start,end[,step]). start or end can be None to be - # unbounded on one side. Note that the standard slice() documentation suggests that only - # elements through end-1 will be selected, but that seems not to be the case in the xarray - # implementation. - if time_slice: - new_filelist = [] - for file in sorted(filelist): - filetime = xr.open_dataset(file).time - filetime_sel = safer_timeslice(filetime, time_slice) - include_this_file = filetime_sel.size - if include_this_file: - new_filelist.append(file) - - # If you found some matching files, but then you find one that doesn't, stop going - # through the list. - elif new_filelist: - break - if not new_filelist: - raise RuntimeError(f"No files found in time_slice {time_slice}") - filelist = new_filelist - - # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one - # variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function - # allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes - # directly, but that's bad practice as it could lead to scoping issues. - mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, my_vars, my_vegtypes, time_slice) - - # Import - if isinstance(filelist, list) and len(filelist) == 1: - filelist = filelist[0] - if isinstance(filelist, list): - with warnings.catch_warnings(): - warnings.filterwarnings(action="ignore", category=DeprecationWarning) - if find_spec("dask") is None: - raise ModuleNotFoundError( - "You have asked xarray to import a list of files as a single Dataset using" - " open_mfdataset(), but this requires dask, which is not available.\nFile" - f" list: {filelist}" - ) - this_ds = xr.open_mfdataset( - sorted(filelist), - data_vars="minimal", - preprocess=mfdataset_preproc_closure, - compat="override", - coords="all", - concat_dim="time", - combine="nested", - chunks=chunks, - ) - elif isinstance(filelist, str): - this_ds = xr.open_dataset(filelist, chunks=chunks) - this_ds = mfdataset_preproc(this_ds, my_vars, my_vegtypes, time_slice) - this_ds = this_ds.compute() - - # Warn and/or error about variables that couldn't be imported or derived - if my_vars: - missing_vars = [v for v in my_vars if v not in this_ds] - ok_missing_vars = [v for v in missing_vars if v in my_vars_missing_ok] - bad_missing_vars = [v for v in missing_vars if v not in my_vars_missing_ok] - if ok_missing_vars: - print( - "Could not import some variables; either not present or not deriveable:" - f" {ok_missing_vars}" - ) - if bad_missing_vars: - raise RuntimeError( - "Could not import some variables; either not present or not deriveable:" - f" {bad_missing_vars}" - ) - - if rename_lsmlatlon: - if "lsmlat" in this_ds.dims: - this_ds = this_ds.rename({"lsmlat": "lat"}) - if "lsmlon" in this_ds.dims: - this_ds = this_ds.rename({"lsmlon": "lon"}) - - return this_ds - - def safer_timeslice(ds_in, time_slice, time_var="time"): """ ctsm_pylib can't handle time slicing like Dataset.sel(time=slice("1998-01-01", "2005-12-31")) diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index cb7315c00c..909e1f80a7 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -24,6 +24,9 @@ from ctsm.crop_calendars.grid_one_variable import ( # pylint: disable=wrong-import-position grid_one_variable, ) +from ctsm.crop_calendars.import_ds import ( # pylint: disable=wrong-import-position + import_ds, +) CAN_PLOT = True try: @@ -160,7 +163,7 @@ def import_rx_dates(s_or_h, date_infile, incl_patches1d_itype_veg, mxsowings, lo this_var = f"{s_or_h}date{n_sowing+1}_{i}" date_var_list = date_var_list + [this_var] - this_ds = utils.import_ds(date_infile, my_vars=date_var_list) + this_ds = import_ds(date_infile, my_vars=date_var_list) for var in this_ds: this_ds = this_ds.rename({var: var.replace(f"{s_or_h}date", "gs")}) @@ -274,7 +277,7 @@ def import_and_process_1yr( crops_to_read = utils.define_mgdcrop_list() print(h1_filelist) - dates_ds = utils.import_ds( + dates_ds = import_ds( h1_filelist, my_vars=["SDATES", "HDATES"], my_vegtypes=crops_to_read, @@ -543,7 +546,7 @@ def import_and_process_1yr( h2_files = glob.glob(pattern) if not h2_files: error(logger, f"No files found matching pattern '*h2.{this_year-1}-01-01*.nc(.base)'") - h2_ds = utils.import_ds( + h2_ds = import_ds( h2_files, my_vars=my_vars, my_vegtypes=crops_to_read, diff --git a/python/ctsm/crop_calendars/import_ds.py b/python/ctsm/crop_calendars/import_ds.py new file mode 100644 index 0000000000..77a22b626b --- /dev/null +++ b/python/ctsm/crop_calendars/import_ds.py @@ -0,0 +1,267 @@ +""" +Import a dataset that can be spread over multiple files, only including specified variables +and/or vegetation types and/or timesteps, concatenating by time. + +- DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or + vegetation types. +""" +import re +import warnings +from importlib.util import find_spec +import numpy as np +import xarray as xr +import ctsm.crop_calendars.cropcal_utils as utils +from ctsm.crop_calendars.xr_flexsel import xr_flexsel + + +def compute_derived_vars(ds_in, var): + """ + Compute derived variables + """ + if ( + var == "HYEARS" + and "HDATES" in ds_in + and ds_in.HDATES.dims == ("time", "mxharvests", "patch") + ): + year_list = np.array([np.float32(x.year - 1) for x in ds_in.time.values]) + hyears = ds_in["HDATES"].copy() + hyears.values = np.tile( + np.expand_dims(year_list, (1, 2)), + (1, ds_in.dims["mxharvests"], ds_in.dims["patch"]), + ) + with np.errstate(invalid="ignore"): + is_le_zero = ~np.isnan(ds_in.HDATES.values) & (ds_in.HDATES.values <= 0) + hyears.values[is_le_zero] = ds_in.HDATES.values[is_le_zero] + hyears.values[np.isnan(ds_in.HDATES.values)] = np.nan + hyears.attrs["long_name"] = "DERIVED: actual crop harvest years" + hyears.attrs["units"] = "year" + ds_in["HYEARS"] = hyears + else: + raise RuntimeError(f"Unable to compute derived variable {var}") + return ds_in + + +def mfdataset_preproc(ds_in, vars_to_import, vegtypes_to_import, time_slice): + """ + Function to drop unwanted variables in preprocessing of open_mfdataset(). + + - Makes sure to NOT drop any unspecified variables that will be useful in gridding. + - Also adds vegetation type info in the form of a DataArray of strings. + - Also renames "pft" dimension (and all like-named variables, e.g., pft1d_itype_veg_str) to be + named like "patch". This can later be reversed, for compatibility with other code, using + patch2pft(). + """ + # Rename "pft" dimension and variables to "patch", if needed + if "pft" in ds_in.dims: + pattern = re.compile("pft.*1d") + matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] + pft2patch_dict = {"pft": "patch"} + for match in matches: + pft2patch_dict[match] = match.replace("pft", "patch").replace("patchs", "patches") + ds_in = ds_in.rename(pft2patch_dict) + + derived_vars = [] + if vars_to_import is not None: + # Split vars_to_import into variables that are vs. aren't already in ds + derived_vars = [v for v in vars_to_import if v not in ds_in] + present_vars = [v for v in vars_to_import if v in ds_in] + vars_to_import = present_vars + + # Get list of dimensions present in variables in vars_to_import. + dim_list = [] + for var in vars_to_import: + # list(set(x)) returns a list of the unique items in x + dim_list = list(set(dim_list + list(ds_in.variables[var].dims))) + + # Get any _1d variables that are associated with those dimensions. These will be useful in + # gridding. Also, if any dimension is "pft", set up to rename it and all like-named + # variables to "patch" + oned_vars = [] + for dim in dim_list: + pattern = re.compile(f"{dim}.*1d") + matches = [x for x in list(ds_in.keys()) if pattern.search(x) is not None] + oned_vars = list(set(oned_vars + matches)) + + # Add dimensions and _1d variables to vars_to_import + vars_to_import = list(set(vars_to_import + list(ds_in.dims) + oned_vars)) + + # Add any _bounds variables + bounds_vars = [] + for var in vars_to_import: + bounds_var = var + "_bounds" + if bounds_var in ds_in: + bounds_vars = bounds_vars + [bounds_var] + vars_to_import = vars_to_import + bounds_vars + + # Get list of variables to drop + varlist = list(ds_in.variables) + vars_to_drop = list(np.setdiff1d(varlist, vars_to_import)) + + # Drop them + ds_in = ds_in.drop_vars(vars_to_drop) + + # Add vegetation type info + if "patches1d_itype_veg" in list(ds_in): + this_pftlist = utils.define_pftlist() + utils.get_patch_ivts( + ds_in, this_pftlist + ) # Includes check of whether vegtype changes over time anywhere + vegtype_da = utils.get_vegtype_str_da(this_pftlist) + patches1d_itype_veg_str = vegtype_da.values[ + ds_in.isel(time=0).patches1d_itype_veg.values.astype(int) + ] + npatch = len(patches1d_itype_veg_str) + patches1d_itype_veg_str = xr.DataArray( + patches1d_itype_veg_str, + coords={"patch": np.arange(0, npatch)}, + dims=["patch"], + name="patches1d_itype_veg_str", + ) + ds_in = xr.merge([ds_in, vegtype_da, patches1d_itype_veg_str]) + + # Restrict to veg. types of interest, if any + if vegtypes_to_import is not None: + ds_in = xr_flexsel(ds_in, vegtype=vegtypes_to_import) + + # Restrict to time slice, if any + if time_slice: + ds_in = utils.safer_timeslice(ds_in, time_slice) + + # Finish import + ds_in = xr.decode_cf(ds_in, decode_times=True) + + # Compute derived variables + for var in derived_vars: + ds_in = compute_derived_vars(ds_in, var) + + return ds_in + + +def process_inputs(filelist, my_vars, my_vegtypes, my_vars_missing_ok): + """ + Process inputs to import_ds() + """ + if my_vars_missing_ok is None: + my_vars_missing_ok = [] + # Convert my_vegtypes here, if needed, to avoid repeating the process each time you read a file + # in xr.open_mfdataset(). + if my_vegtypes is not None: + if not isinstance(my_vegtypes, list): + my_vegtypes = [my_vegtypes] + if isinstance(my_vegtypes[0], str): + my_vegtypes = utils.vegtype_str2int(my_vegtypes) + + # Same for these variables. + if my_vars is not None: + if not isinstance(my_vars, list): + my_vars = [my_vars] + if my_vars_missing_ok: + if not isinstance(my_vars_missing_ok, list): + my_vars_missing_ok = [my_vars_missing_ok] + + # Make sure lists are actually lists + if not isinstance(filelist, list): + filelist = [filelist] + if not isinstance(my_vars_missing_ok, list): + my_vars_missing_ok = [my_vars_missing_ok] + return filelist, my_vars, my_vegtypes, my_vars_missing_ok + + +def import_ds( + filelist, + my_vars=None, + my_vegtypes=None, + time_slice=None, + my_vars_missing_ok=None, + rename_lsmlatlon=False, + chunks=None, +): + """ + Import a dataset that can be spread over multiple files, only including specified variables + and/or vegetation types and/or timesteps, concatenating by time. + + - DOES actually read the dataset into memory, but only AFTER dropping unwanted variables and/or + vegetation types. + """ + filelist, my_vars, my_vegtypes, my_vars_missing_ok = process_inputs( + filelist, my_vars, my_vegtypes, my_vars_missing_ok + ) + + # Remove files from list if they don't contain requested timesteps. + # time_slice should be in the format slice(start,end[,step]). start or end can be None to be + # unbounded on one side. Note that the standard slice() documentation suggests that only + # elements through end-1 will be selected, but that seems not to be the case in the xarray + # implementation. + if time_slice: + new_filelist = [] + for file in sorted(filelist): + filetime = xr.open_dataset(file).time + filetime_sel = utils.safer_timeslice(filetime, time_slice) + include_this_file = filetime_sel.size + if include_this_file: + new_filelist.append(file) + + # If you found some matching files, but then you find one that doesn't, stop going + # through the list. + elif new_filelist: + break + if not new_filelist: + raise RuntimeError(f"No files found in time_slice {time_slice}") + filelist = new_filelist + + # The xarray open_mfdataset() "preprocess" argument requires a function that takes exactly one + # variable (an xarray.Dataset object). Wrapping mfdataset_preproc() in this lambda function + # allows this. Could also just allow mfdataset_preproc() to access my_vars and my_vegtypes + # directly, but that's bad practice as it could lead to scoping issues. + mfdataset_preproc_closure = lambda ds: mfdataset_preproc(ds, my_vars, my_vegtypes, time_slice) + + # Import + if isinstance(filelist, list) and len(filelist) == 1: + filelist = filelist[0] + if isinstance(filelist, list): + with warnings.catch_warnings(): + warnings.filterwarnings(action="ignore", category=DeprecationWarning) + if find_spec("dask") is None: + raise ModuleNotFoundError( + "You have asked xarray to import a list of files as a single Dataset using" + " open_mfdataset(), but this requires dask, which is not available.\nFile" + f" list: {filelist}" + ) + this_ds = xr.open_mfdataset( + sorted(filelist), + data_vars="minimal", + preprocess=mfdataset_preproc_closure, + compat="override", + coords="all", + concat_dim="time", + combine="nested", + chunks=chunks, + ) + elif isinstance(filelist, str): + this_ds = xr.open_dataset(filelist, chunks=chunks) + this_ds = mfdataset_preproc(this_ds, my_vars, my_vegtypes, time_slice) + this_ds = this_ds.compute() + + # Warn and/or error about variables that couldn't be imported or derived + if my_vars: + missing_vars = [v for v in my_vars if v not in this_ds] + ok_missing_vars = [v for v in missing_vars if v in my_vars_missing_ok] + bad_missing_vars = [v for v in missing_vars if v not in my_vars_missing_ok] + if ok_missing_vars: + print( + "Could not import some variables; either not present or not deriveable:" + f" {ok_missing_vars}" + ) + if bad_missing_vars: + raise RuntimeError( + "Could not import some variables; either not present or not deriveable:" + f" {bad_missing_vars}" + ) + + if rename_lsmlatlon: + if "lsmlat" in this_ds.dims: + this_ds = this_ds.rename({"lsmlat": "lat"}) + if "lsmlon" in this_ds.dims: + this_ds = this_ds.rename({"lsmlon": "lon"}) + + return this_ds From 605bb3b2ece854ff831313fe99a53acac8784413 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:30:37 -0700 Subject: [PATCH 76/85] Satisfy pylint for cropcal_utils.py. --- python/ctsm/crop_calendars/cropcal_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/ctsm/crop_calendars/cropcal_utils.py b/python/ctsm/crop_calendars/cropcal_utils.py index e2b94070a1..00ed2413d2 100644 --- a/python/ctsm/crop_calendars/cropcal_utils.py +++ b/python/ctsm/crop_calendars/cropcal_utils.py @@ -101,7 +101,7 @@ def ivt_str2int(ivt_str): pftlist = define_pftlist() if isinstance(ivt_str, str): ivt_int = pftlist.index(ivt_str) - elif isinstance(ivt_str, list) or isinstance(ivt_str, np.ndarray): + elif isinstance(ivt_str, (list, np.ndarray)): ivt_int = [ivt_str2int(x) for x in ivt_str] if isinstance(ivt_str, np.ndarray): ivt_int = np.array(ivt_int) @@ -120,7 +120,7 @@ def ivt_int2str(ivt_int): pftlist = define_pftlist() if np.issubdtype(type(ivt_int), np.integer) or int(ivt_int) == ivt_int: ivt_str = pftlist[int(ivt_int)] - elif isinstance(ivt_int, list) or isinstance(ivt_int, np.ndarray): + elif isinstance(ivt_int, (list, np.ndarray)): ivt_str = [ivt_int2str(x) for x in ivt_int] if isinstance(ivt_int, np.ndarray): ivt_str = np.array(ivt_str) @@ -150,7 +150,7 @@ def is_this_vegtype(this_vegtype, this_filter, this_method): # Make sure data type of this_vegtype is acceptable if isinstance(this_vegtype, float) and int(this_vegtype) == this_vegtype: this_vegtype = int(this_vegtype) - data_type_ok = lambda x: isinstance(x, str) or isinstance(x, int) or isinstance(x, np.int64) + data_type_ok = lambda x: isinstance(x, (int, np.int64, str)) ok_input = True if not data_type_ok(this_vegtype): if isinstance(this_vegtype, xr.core.dataarray.DataArray): @@ -255,7 +255,8 @@ def get_patch_ivts(this_ds, this_pftlist): """ Get PFT of each patch, in both integer and string forms. """ - # First, get all the integer values; should be time*pft or pft*time. We will eventually just take the first timestep. + # First, get all the integer values; should be time*pft or pft*time. We will eventually just + # take the first timestep. vegtype_int = this_ds.patches1d_itype_veg vegtype_int.values = vegtype_int.values.astype(int) From 3808b4b8391b6dad3e7c9bdfd8b936ad972f424e Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:33:18 -0700 Subject: [PATCH 77/85] Ignore raise-missing-from in xr_flexsel.py. --- python/ctsm/crop_calendars/xr_flexsel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/crop_calendars/xr_flexsel.py b/python/ctsm/crop_calendars/xr_flexsel.py index 1e30593946..d51d925985 100644 --- a/python/ctsm/crop_calendars/xr_flexsel.py +++ b/python/ctsm/crop_calendars/xr_flexsel.py @@ -254,7 +254,7 @@ def handle_callable(xr_object, key, selection): if selection == np.mean: # pylint: disable=comparison-with-callable try: xr_object = xr_object.mean(dim=key) - except: + except: # pylint: disable=raise-missing-from raise ValueError( f"Failed to take mean of dimension {key}. Try doing so outside of xr_flexsel()." ) From a90049d260de22ff3b7fbe5c3bf71423fc6e3b31 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 10:50:37 -0700 Subject: [PATCH 78/85] Remove unneeded '_CTSM_PYTHON =' bits. --- .../crop_calendars/check_constant_vars.py | 14 +--------- python/ctsm/crop_calendars/check_rx_obeyed.py | 15 ++--------- .../crop_calendars/convert_axis_time2gs.py | 9 ------- python/ctsm/crop_calendars/cropcal_module.py | 26 ++++--------------- .../crop_calendars/generate_gdds_functions.py | 22 ++++------------ 5 files changed, 13 insertions(+), 73 deletions(-) diff --git a/python/ctsm/crop_calendars/check_constant_vars.py b/python/ctsm/crop_calendars/check_constant_vars.py index 1a5a4e62c6..aa25a412fe 100644 --- a/python/ctsm/crop_calendars/check_constant_vars.py +++ b/python/ctsm/crop_calendars/check_constant_vars.py @@ -2,20 +2,8 @@ For variables that should stay constant, make sure they are """ -import sys -import os import numpy as np - -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script -# in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) -from ctsm.crop_calendars.cropcal_module import ( # pylint: disable=wrong-import-position - import_rx_dates, -) +from ctsm.crop_calendars.cropcal_module import import_rx_dates def check_one_constant_var_setup(this_ds, case, var): diff --git a/python/ctsm/crop_calendars/check_rx_obeyed.py b/python/ctsm/crop_calendars/check_rx_obeyed.py index 3d769d3820..99b8d80bde 100644 --- a/python/ctsm/crop_calendars/check_rx_obeyed.py +++ b/python/ctsm/crop_calendars/check_rx_obeyed.py @@ -2,21 +2,10 @@ Check that prescribed crop calendars were obeyed """ -import sys -import os import numpy as np -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script -# in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position -from ctsm.crop_calendars.cropcal_constants import ( # pylint: disable=wrong-import-position - DEFAULT_GDD_MIN, -) +import ctsm.crop_calendars.cropcal_utils as utils +from ctsm.crop_calendars.cropcal_constants import DEFAULT_GDD_MIN def get_pct_harv_at_mature(harvest_reason_da): diff --git a/python/ctsm/crop_calendars/convert_axis_time2gs.py b/python/ctsm/crop_calendars/convert_axis_time2gs.py index f311d39e05..d48514370d 100644 --- a/python/ctsm/crop_calendars/convert_axis_time2gs.py +++ b/python/ctsm/crop_calendars/convert_axis_time2gs.py @@ -3,18 +3,9 @@ """ import warnings import sys -import os import numpy as np import xarray as xr -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script -# in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) - try: import pandas as pd except ModuleNotFoundError: diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index 671a6334c7..3fe6942f94 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -2,32 +2,16 @@ Helper functions for various crop calendar stuff """ -import sys import os import glob import numpy as np import xarray as xr -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script -# in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position -from ctsm.crop_calendars.convert_axis_time2gs import ( # pylint: disable=wrong-import-position - convert_axis_time2gs, -) -from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position - check_rx_obeyed, -) -from ctsm.crop_calendars.cropcal_constants import ( # pylint: disable=wrong-import-position - DEFAULT_GDD_MIN, -) -from ctsm.crop_calendars.import_ds import ( # pylint: disable=wrong-import-position - import_ds, -) +import ctsm.crop_calendars.cropcal_utils as utils +from ctsm.crop_calendars.convert_axis_time2gs import convert_axis_time2gs +from ctsm.crop_calendars.check_rx_obeyed import check_rx_obeyed +from ctsm.crop_calendars.cropcal_constants import DEFAULT_GDD_MIN +from ctsm.crop_calendars.import_ds import import_ds def check_and_trim_years(year_1, year_n, ds_in): diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index 909e1f80a7..8af2fdc049 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -4,29 +4,17 @@ # pylint: disable=too-many-lines,too-many-statements import warnings import os -import sys import glob import datetime as dt from importlib import util as importlib_util import numpy as np import xarray as xr -# Import the CTSM Python utilities. -# sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script -# in the RUN phase seems to require the python/ directory to be manually added to path. -_CTSM_PYTHON = os.path.join( - os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" -) -sys.path.insert(1, _CTSM_PYTHON) -import ctsm.crop_calendars.cropcal_utils as utils # pylint: disable=wrong-import-position -import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position -from ctsm.crop_calendars.xr_flexsel import xr_flexsel # pylint: disable=wrong-import-position -from ctsm.crop_calendars.grid_one_variable import ( # pylint: disable=wrong-import-position - grid_one_variable, -) -from ctsm.crop_calendars.import_ds import ( # pylint: disable=wrong-import-position - import_ds, -) +import ctsm.crop_calendars.cropcal_utils as utils +import ctsm.crop_calendars.cropcal_module as cc +from ctsm.crop_calendars.xr_flexsel import xr_flexsel +from ctsm.crop_calendars.grid_one_variable import grid_one_variable +from ctsm.crop_calendars.import_ds import import_ds CAN_PLOT = True try: From 657cc5c45fc5ca65e84746c15d0a70097c29ce20 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sun, 11 Feb 2024 15:10:55 -0700 Subject: [PATCH 79/85] Fix imports in check_rxboth_run.py. --- python/ctsm/crop_calendars/check_rxboth_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ctsm/crop_calendars/check_rxboth_run.py b/python/ctsm/crop_calendars/check_rxboth_run.py index 126ef98bbc..ae4decde30 100644 --- a/python/ctsm/crop_calendars/check_rxboth_run.py +++ b/python/ctsm/crop_calendars/check_rxboth_run.py @@ -6,7 +6,6 @@ import glob import os import numpy as np -import cropcal_module as cc # pylint: disable=import-error # Import the CTSM Python utilities. # sys.path.insert() is necessary for RXCROPMATURITY to work. The fact that it's calling this script @@ -15,6 +14,7 @@ os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir, os.pardir, "python" ) sys.path.insert(1, _CTSM_PYTHON) +import ctsm.crop_calendars.cropcal_module as cc # pylint: disable=wrong-import-position from ctsm.crop_calendars.check_rx_obeyed import ( # pylint: disable=wrong-import-position check_rx_obeyed, ) From b671ed77779d2296d62564ee2b7eeb8655354b81 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Tue, 13 Feb 2024 16:24:17 -0700 Subject: [PATCH 80/85] FSURDATMODIFYCTSM test (aux_clm/clm_pymods) moved from cheyenne_intel to derecho_gnu. derecho_intel doesn't currently work with debug mode on. --- cime_config/testdefs/testlist_clm.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index 4761a2111f..d8ef6357cd 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -3308,8 +3308,8 @@ - - + + From 7401dd989e6ce7299d09e93fdf9bd95a227be0c0 Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Thu, 15 Feb 2024 12:42:07 -0700 Subject: [PATCH 81/85] Resolve issue #2366 and PR #2355 --- src/biogeophys/CanopyFluxesMod.F90 | 3 ++- src/biogeophys/TemperatureType.F90 | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/biogeophys/CanopyFluxesMod.F90 b/src/biogeophys/CanopyFluxesMod.F90 index f152e761eb..58334a70c0 100644 --- a/src/biogeophys/CanopyFluxesMod.F90 +++ b/src/biogeophys/CanopyFluxesMod.F90 @@ -1605,7 +1605,8 @@ subroutine CanopyFluxes(bounds, num_exposedvegp, filter_exposedvegp, if (t_veg(p) > tfrz ) then ! above freezing, update accumulation in liqcan if ((qflx_evap_veg(p)-qflx_tran_veg(p))*dtime > liqcan(p)) then ! all liq evap ! In this case, all liqcan will evap. Take remainder from snocan - snocan(p)=snocan(p)+liqcan(p)+(qflx_tran_veg(p)-qflx_evap_veg(p))*dtime + snocan(p) = max(0._r8, & + snocan(p) + liqcan(p) + (qflx_tran_veg(p) - qflx_evap_veg(p)) * dtime) end if liqcan(p) = max(0._r8,liqcan(p)+(qflx_tran_veg(p)-qflx_evap_veg(p))*dtime) diff --git a/src/biogeophys/TemperatureType.F90 b/src/biogeophys/TemperatureType.F90 index 21445caaae..ab310650c8 100644 --- a/src/biogeophys/TemperatureType.F90 +++ b/src/biogeophys/TemperatureType.F90 @@ -732,7 +732,7 @@ subroutine InitCold(this, bounds, & end if else if (col%itype(c) == icol_road_perv .or. col%itype(c) == icol_road_imperv) then - this%t_soisno_col(c,1:nlevgrnd) = 272._r8 + this%t_soisno_col(c,1:nlevgrnd) = 274._r8 else if (col%itype(c) == icol_sunwall .or. col%itype(c) == icol_shadewall & .or. col%itype(c) == icol_roof) then ! Set sunwall, shadewall, roof to fairly high temperature to avoid initialization @@ -741,7 +741,7 @@ subroutine InitCold(this, bounds, & end if end if else - this%t_soisno_col(c,1:nlevgrnd) = 274._r8 + this%t_soisno_col(c,1:nlevgrnd) = 272._r8 if (use_excess_ice .and. (lun%itype(l) == istsoil .or. lun%itype(l) == istcrop)) then this%t_soisno_col(c,1:nlevgrnd) = SHR_CONST_TKFRZ - 5.0_r8 !needs to be below freezing to properly initiate excess ice end if From 1c9ef971a72e4afbc562ad850bf88f4e2d4230fb Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Thu, 15 Feb 2024 16:47:18 -0700 Subject: [PATCH 82/85] Revert changes from #2355 due to error in FATES --- src/biogeophys/TemperatureType.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/biogeophys/TemperatureType.F90 b/src/biogeophys/TemperatureType.F90 index ab310650c8..21445caaae 100644 --- a/src/biogeophys/TemperatureType.F90 +++ b/src/biogeophys/TemperatureType.F90 @@ -732,7 +732,7 @@ subroutine InitCold(this, bounds, & end if else if (col%itype(c) == icol_road_perv .or. col%itype(c) == icol_road_imperv) then - this%t_soisno_col(c,1:nlevgrnd) = 274._r8 + this%t_soisno_col(c,1:nlevgrnd) = 272._r8 else if (col%itype(c) == icol_sunwall .or. col%itype(c) == icol_shadewall & .or. col%itype(c) == icol_roof) then ! Set sunwall, shadewall, roof to fairly high temperature to avoid initialization @@ -741,7 +741,7 @@ subroutine InitCold(this, bounds, & end if end if else - this%t_soisno_col(c,1:nlevgrnd) = 272._r8 + this%t_soisno_col(c,1:nlevgrnd) = 274._r8 if (use_excess_ice .and. (lun%itype(l) == istsoil .or. lun%itype(l) == istcrop)) then this%t_soisno_col(c,1:nlevgrnd) = SHR_CONST_TKFRZ - 5.0_r8 !needs to be below freezing to properly initiate excess ice end if From 06290f823c0a5ccbb3bb898183621b190a88cd7f Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 16 Feb 2024 10:55:07 -0700 Subject: [PATCH 83/85] Hillslope tests now use v1.3 surface dataset. --- cime_config/testdefs/testmods_dirs/clm/Hillslope/user_nl_clm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/Hillslope/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/Hillslope/user_nl_clm index e6d726c860..9d74da1c94 100644 --- a/cime_config/testdefs/testmods_dirs/clm/Hillslope/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/Hillslope/user_nl_clm @@ -6,6 +6,6 @@ hillslope_transmissivity_method = 'LayerSum' hillslope_pft_distribution_method = 'PftLowlandUpland' hillslope_soil_profile_method = 'Uniform' -fsurdat = '$DIN_LOC_ROOT/lnd/clm2/testdata/surfdata_10x15_78pfts_simyr2000_synthetic_cosphill_1.2.nc' +fsurdat = '$DIN_LOC_ROOT/lnd/clm2/testdata/surfdata_10x15_78pfts_simyr2000_synthetic_cosphill_1.3.nc' use_ssre = .false. \ No newline at end of file From 3ba613da4250bab14af7cf2d7800c4096fde6f6f Mon Sep 17 00:00:00 2001 From: Samuel Levis Date: Fri, 16 Feb 2024 13:57:54 -0700 Subject: [PATCH 84/85] Updated ChangeLog/ChangeSum --- doc/ChangeLog | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++ doc/ChangeSum | 1 + 2 files changed, 82 insertions(+) diff --git a/doc/ChangeLog b/doc/ChangeLog index 084516e23e..fcdf87d053 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,4 +1,85 @@ =============================================================== +Tag name: ctsm5.1.dev168 +Originator(s): slevis (Samuel Levis,UCAR/TSS,303-665-1310) +Date: Fri 16 Feb 2024 01:27:41 PM MST +One-line Summary: Remove a source of negative snocan in CanopyFluxesMod + +Purpose and description of changes +---------------------------------- + +In ctsm5.2 testing, this test +LWISO_Ld10.f10_f10_mg37.I2000Clm50BgcCrop.derecho_gnu.clm-coldStart +complained of a tiny negative ice1_grc tracer not matching the bulk +value. My troubleshooting led me to more than tiny negative snocan +originating in a line of code that this PR now changes to prevent +negative values. + +Significant changes to scientifically-supported configurations +-------------------------------------------------------------- + +Does this tag change answers significantly for any of the following physics configurations? +(Details of any changes will be given in the "Answer changes" section below.) + + [Put an [X] in the box for any configuration with significant answer changes.] + +[ ] clm5_1 + +[ ] clm5_0 + +[ ] ctsm5_0-nwp + +[ ] clm4_5 + + +Bugs fixed +---------- +CTSM issues fixed (include CTSM Issue #): +Fixes #2366 + +Notes of particular relevance for developers: +--------------------------------------------- +Caveats for developers (e.g., code that is duplicated that requires double maintenance): + It was suggested at the ctsm software meeting yesterday that, in addition to + including "max(0._r8," in this line of code, that I reorder the code + by bringing "liqcan(p) =" before "snocan(p) =". I have decided against this + because the existing order repeats in a following paragraph of code right + after this one. It's likely that the group's suggestion would have worked, but + I did not want to delay this PR for a longer evaluation because CTSM5.2 is + waiting for this merge, in order to proceed with next steps. + + +Testing summary: +---------------- + + regular tests (aux_clm: https://github.com/ESCOMP/CTSM/wiki/System-Testing-Guide#pre-merge-system-testing): + + derecho ----- OK + izumi ------- OK + + +Answer changes +-------------- + +Changes answers relative to baseline: YES + + Summarize any changes to answers, i.e., + - what code configurations: all + - what platforms/compilers: all + - nature of change: roundoff + A short test, e.g. + SMS_Ln9.ne30pg2_ne30pg2_mg17.I1850Clm50Sp.derecho_intel.clm-clm50cam6LndTuningMode + has these maximum differences: +RMS H2OCAN 4.7359E-19 NORMALIZED 4.0163E-18 +RMS SNOCAN 4.4873E-19 NORMALIZED 9.1036E-18 + while the differences grow in longer tests. + +Other details +------------- +Pull Requests that document the changes (include PR ids): + https://github.com/ESCOMP/ctsm/pull/2371 + +=============================================================== +=============================================================== Tag name: ctsm5.1.dev167 Originator(s): samrabin (Sam Rabin, UCAR/TSS, samrabin@ucar.edu) Date: Thu 08 Feb 2024 01:56:05 PM MST diff --git a/doc/ChangeSum b/doc/ChangeSum index d644cff144..56a460ea85 100644 --- a/doc/ChangeSum +++ b/doc/ChangeSum @@ -1,5 +1,6 @@ Tag Who Date Summary ============================================================================================================================ + ctsm5.1.dev168 slevis 02/16/2024 Remove a source of negative snocan in CanopyFluxesMod ctsm5.1.dev167 samrabin 02/08/2024 Delete _FillValue and history from parameter files ctsm5.1.dev166 multiple 01/24/2024 BFB merge tag ctsm5.1.dev165 slevis 01/19/2024 Turn Meier2022, tillage, residue removal on for ctsm5.1, fix #2212 From 9b43482202383badada23dd865bdd8ccdc27aba7 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Thu, 22 Feb 2024 09:42:06 -0700 Subject: [PATCH 85/85] Update ChangeLog and ChangeSum. --- doc/ChangeLog | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++ doc/ChangeSum | 1 + 2 files changed, 72 insertions(+) diff --git a/doc/ChangeLog b/doc/ChangeLog index fcdf87d053..d26715f699 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,4 +1,75 @@ =============================================================== +Tag name: ctsm5.1.dev169 +Originator(s): samrabin (Sam Rabin, UCAR/TSS, samrabin@ucar.edu) +Date: Thu 22 Feb 2024 09:42:57 AM MST +One-line Summary: Merge b4b-dev + +Purpose and description of changes +---------------------------------- + +Brings in 3 PRs from b4b-dev to master: +- Do not crash "make all" even if pylint isn't clean (ESCOMP/CTSM#2353; Sam Rabin) +- Resolve pylint issues (ESCOMP/CTSM#2354; Sam Rabin) +- Move FSURDATMODIFYCTSM test to Derecho (ESCOMP/CTSM#2364; Sam Rabin) + +Significant changes to scientifically-supported configurations +-------------------------------------------------------------- + +Does this tag change answers significantly for any of the following physics configurations? +(Details of any changes will be given in the "Answer changes" section below.) + +[ ] clm5_1 + +[ ] clm5_0 + +[ ] ctsm5_0-nwp + +[ ] clm4_5 + + +Bugs fixed +---------- + +CTSM issues fixed: +- Fixes ESCOMP/CTSM#2255: make lint is not clean in ctsm5.1.dev152 +- Fixes ESCOMP/CTSM#2316: "make all" doesn't run black if lint fails +- FIXES ESCOMP/CTSM#2362: FSURDATMODIFYCTSM test should be moved to Derecho or Izumi + + +Notes of particular relevance for developers: +--------------------------------------------- + +Changes to tests or testing: +- FSURDATMODIFYCTSM test changed from derecho_intel (didn't work in debug mode) to derecho_gnu. I.e., from + FSURDATMODIFYCTSM_D_Mmpi-serial_Ld1.5x5_amazon.I2000Clm50SpRs.derecho_intel + to + FSURDATMODIFYCTSM_D_Mmpi-serial_Ld1.5x5_amazon.I2000Clm50SpRs.derecho_gnu + + +Testing summary: +---------------- + + [PASS means all tests PASS; OK means tests PASS other than expected fails.] + + regular tests (aux_clm: https://github.com/ESCOMP/CTSM/wiki/System-Testing-Guide#pre-merge-system-testing): + + derecho ----- OK + izumi ------- OK + + any other testing (give details below): + - "make all" in python/ is clean. + + +Other details +------------- + +Pull Requests that document the changes (include PR ids): +- ESCOMP/CTSM#2353: Do not crash "make all" even if pylint isn't clean (https://github.com/ESCOMP/CTSM/pull/2353) +- ESCOMP/CTSM#2354: Resolve pylint issues (https://github.com/ESCOMP/CTSM/pull/2354) +- ESCOMP/CTSM#2364: Move FSURDATMODIFYCTSM test to Derecho (https://github.com/ESCOMP/CTSM/pull/2364) + +=============================================================== +=============================================================== Tag name: ctsm5.1.dev168 Originator(s): slevis (Samuel Levis,UCAR/TSS,303-665-1310) Date: Fri 16 Feb 2024 01:27:41 PM MST diff --git a/doc/ChangeSum b/doc/ChangeSum index 56a460ea85..18ae34626f 100644 --- a/doc/ChangeSum +++ b/doc/ChangeSum @@ -1,5 +1,6 @@ Tag Who Date Summary ============================================================================================================================ + ctsm5.1.dev169 samrabin 02/22/2024 Merge b4b-dev ctsm5.1.dev168 slevis 02/16/2024 Remove a source of negative snocan in CanopyFluxesMod ctsm5.1.dev167 samrabin 02/08/2024 Delete _FillValue and history from parameter files ctsm5.1.dev166 multiple 01/24/2024 BFB merge tag