Dev-ife: Updates to required dependencies, bug fix for rwl readers an…

…d writers (#56) * xdate works for overall series correlation * Added code for creating bins and dividing series into segments * Cleaning up and commenting related to xdate * series_corr works but is inefficient * WIP changes * Added comments, updated working jupyter notebook * Changes since start of fall semester * variance stabiliization produces accurate values * Unit tests for readers, summary, stats and tbrm * Added unit tests for detrend and chron * Added tests for chron_stabilized, series_corr and writers * Fixed merge conflicts * v0.1 release * Fixing the rwl reader and writer problem * Create pypi_release.yml * push for workflow (#54) * Update pypi_release.yml * Update pyproject.toml * xdate works for overall series correlation * series_corr works but is inefficient * WIP changes * Added comments, updated working jupyter notebook * Changes since start of fall semester * variance stabiliization produces accurate values * Unit tests for readers, summary, stats and tbrm * Added tests for chron_stabilized, series_corr and writers * v0.1 release * Attempt at fixing dependency issues --------- Co-authored-by: Ifeoluwa Ale <[email protected]> Co-authored-by: cosimichele <[email protected]> Co-authored-by: Ifeoluwa Ale <[email protected]> Co-authored-by: Michele Cosi <[email protected]>
OpenDendro · Nov 30, 2023 · 796042d · 796042d
1 parent a8c3374
commit 796042d
Show file tree

Hide file tree

Showing 17 changed files with 650 additions and 69 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,8 +1,3 @@
-# Scripts for testing
-src/test_*.py
-src/*.txt
-src/misc.py
-
 # IDE stuff
 .DS_Store
 .vscode/

diff --git a/dplpy/__init__.py b/dplpy/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Tyson Lee Swetnam"
 __email__ = "[email protected]"
-__version__ = "0.1"
+__version__ = "0.1.testing"
 
 __copyright__ = """
    dplPy for tree ring width time series analyses

diff --git a/dplpy/detrend.py b/dplpy/detrend.py
@@ -42,14 +42,13 @@
 
 def detrend(data: pd.DataFrame | pd.Series, fit="spline", method="residual", plot=True, period=None):
     if isinstance(data, pd.DataFrame):
-
         res = pd.DataFrame(index=pd.Index(data.index))
         to_add = [res]
         for column in data.columns:
             to_add.append(detrend_series(data[column], column, fit, method, plot, period=None))
         output_df = pd.concat(to_add, axis=1)
         return output_df.rename_axis(data.index.name)
-      
+
     elif isinstance(data, pd.Series):
         return detrend_series(data, data.name, fit, method, plot)
     else:
@@ -90,7 +89,6 @@ def detrend_series(data, series_name, fit, method, plot, period=None):
     if plot:
         fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7,3))
 
-
         axes[0].plot(x, y, "k-", x, yi, "r-", linewidth=2)
         axes[0].set_xlabel('Year')
         axes[0].set_ylabel('Length')

diff --git a/dplpy/dplpy.py b/dplpy/dplpy.py
@@ -206,6 +206,14 @@ def write_from_parser(args):
 def series_corr_from_parser(args):
     series_corr(input=args.input)
 
+def series_corr_from_parser(args):
+    series_corr(input=args.input)
+
+def common_interval_from_parser(args):
+    common_interval(input=args.input)
+
+def rbar_from_parser(args):
+    rbar(input=args.input)
 # creates whitespace
 print("")
 

diff --git a/dplpy/new.ipynb b/dplpy/new.ipynb
diff --git a/dplpy/rbar.py b/dplpy/rbar.py
@@ -73,7 +73,6 @@ def common_interval(data):
 # rbar returns a list of constants to multiply with each mean value generated for a range of years from a mean value chronology.
 # Can use osborn, frank and 67spline methods to generate rbar values.
 # Will be updated in the future to prioritize number of series, number of years or both. Currently attempts to do both.
-
 def get_running_rbar(data, min_seg_ratio, method="osborn", corr_type="pearson"):
     # how we deal with nans will depend on method chosen for finding rbar. 
     # drop all series with nans for osborn, but drop only if they are not up to fraction of seg_length for frank

diff --git a/dplpy/readers.py b/dplpy/readers.py
@@ -125,29 +125,42 @@ def read_rwl(lines):
     last_date = -sys.maxsize
 
     for line in lines:
-        line = line.rstrip("\n").split()
+        line = line.rstrip("\n")
+
+        if line[7] != '-' and line[6] != '-':
+            series_id = line[:8].strip()
+            iyr = int(line[8:12])
+        elif line[7] == '-':
+            series_id = line[:7].strip()
+            iyr = int(line[7:12]) 
+        elif line[6] == '-':
+            series_id = line[:6].strip()
+            iyr = int(line[6:12])
 
-        series_id = line[0]
         if series_id not in rwl_data:
             rwl_data[series_id] = {}
+
+        dataline = [line[i:i+6] for i in range(12, len(line), 6) if line[i:i+6].strip()]
 
         # keep track of the first and last date in the dataset
-        line_start = int(line[1])
+        line_start = int(iyr)
         first_date = min(first_date, line_start)
-        last_date = max(last_date, (line_start+len(line)-3))
+        last_date = max(last_date, (line_start+len(dataline)-1))
 
         # will implement some standardization here so that all data read is consistent, and all data written in rwl
         # can be written to one of the two popular precisions.
-        for i in range(2, len(line)):
+        for i in range(0, len(dataline)):
             try:
-                if line[i] == "999":
+                if dataline[i].strip() == "999":
                     rwl_data[series_id]["div"] = 100
                     continue
-                elif line[i] == "-9999":
+                elif dataline[i].strip() == "-9999":
                     rwl_data[series_id]["div"] = 1000
                     continue
-                data = float(int(line[i]))
+                data = float(int(dataline[i]))
             except ValueError as valerr: # Stops reader, escalates to give the user an error when unexpected formatting is detected.
+                print("Error:", valerr)
+                print("See line:", line)
                 return None, None, None
-            rwl_data[series_id][line_start+i-2] = data
-    return rwl_data, first_date, last_date
+            rwl_data[series_id][line_start+i] = data
+    return rwl_data, first_date, last_date
diff --git a/dplpy/series_corr.py b/dplpy/series_corr.py
@@ -1,4 +1,3 @@
-
 __copyright__ = """
    dplPy for tree ring width time series analyses
    Copyright (C) 2023  OpenDendro
@@ -39,12 +38,12 @@
 from autoreg import ar_func_series
 from chron import chron
 from xdate import get_ar_lag, correlate, compare_segment, get_bins, get_crit
+
 import pandas as pd
 import numpy as np
 import scipy
 import matplotlib.pyplot as plt
 
-
 # Analyzes the crossdating of one series compared to the master chronology
 def series_corr(data: pd.DataFrame, series_name: str, prewhiten=True, corr="Spearman", seg_length=50, bin_floor=100, p_val=0.05):
     # Check types of inputs
@@ -90,7 +89,6 @@ def series_corr(data: pd.DataFrame, series_name: str, prewhiten=True, corr="Spea
     start, end = get_rel_range(data_first, data_last, ser_first, ser_last, bin_floor, seg_length)
 
     plt.style.use('seaborn-v0_8-darkgrid')
-
     wid = max((end - start)//30, 1)
     hei = 10
     base_corr = get_crit(p_val)

diff --git a/dplpy/writers.py b/dplpy/writers.py
@@ -108,28 +108,40 @@ def write_rwl(data, file):
         start = data[series].first_valid_index()
         end = data[series].last_valid_index()
         i = start
-        file.write(series.rjust(6) + "\t")
-        file.write(str(i).rjust(4) + "\t")
+
+        if i < 0:
+            file.write(series.ljust(7))
+            file.write(str(i).rjust(5))
+        else:
+            file.write(series.ljust(8))
+            file.write(str(i).rjust(4))
         while i <= end:
             if np.isnan(data[series][i]):
-                file.write(str(-9999))
+                file.write(str(-9999).rjust(6))
                 file.write("\n")
                 while i <= end and np.isnan(data[series][i]):
                     i += 1
                 if i <= end:
-                    file.write(series.rjust(6) + "\t")
-                    file.write(str(i).rjust(4) + "\t")
+                    if i < 0:
+                        file.write(series.ljust(7))
+                        file.write(str(i).rjust(5))
+                    else:
+                        file.write(series.ljust(8))
+                        file.write(str(i).rjust(4))
                 continue
 
-            file.write((f"{data[series][i]:.3f}").lstrip('0').replace('.', '').rjust(4, '0') + "\t")
+            file.write((f"{data[series][i]:.3f}").lstrip('0').replace('.', '').rjust(4, '0').rjust(6))
             i += 1
             if i % 10 == 0:
                 file.write("\n")
-                file.write(series.rjust(6) + "\t")
-                file.write(str(i).rjust(4) + "\t")
-
-
-        file.write(str(-9999))
+                if i < 0:
+                    file.write(series.ljust(7))
+                    file.write(str(i).rjust(5))
+                else:
+                    file.write(series.ljust(8))
+                    file.write(str(i).rjust(4))
+
+        file.write(str(-9999).rjust(6))
         file.write("\n")
 
 

diff --git a/dplpy/xdate.py b/dplpy/xdate.py
@@ -53,7 +53,6 @@ def xdate(data: pd.DataFrame, prewhiten=True, corr="Spearman", slide_period=50,
         errorMsg = "Expected dataframe input, got " + str(type(data)) + " instead."
         raise TypeError(errorMsg)
 
-
     # Identify first and last valid indexes, for separating into bins.
     bins, bin_data = get_bins(data.first_valid_index(), data.last_valid_index(), bin_floor, slide_period)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -13,14 +13,19 @@ version="0.1"
 authors=[
     {name='Tyson Lee Swetnam', email='[email protected]'},
     {name='Ifeoluwa Ale', email='[email protected]'},
+    {name='Kevin Anchukaitis', email='[email protected]'},
 ]
 license={file='LICENSE'}
-requires-python = '>=3.11'
+requires-python = '>=3.10'
 dependencies=[
-    "pandas>=2.1.1", 
-    "numpy>=1.26.0", 
+    "numpy>=1.22.4,<2; python_version<'3.11'",
+    "numpy>=1.23.2,<2; python_version=='3.11'",
+    "numpy>=1.26.0,<2; python_version>='3.12'",
+    "pandas>=2.0.0",
+
     "csaps==1.1.0",
-    "matplotlib>=3.8.1",
-    "statsmodels>=0.14.0",
+    "matplotlib>=3.8.0",
+    "statsmodels>=0.13.5; python_version>='3.10'",
+    "statsmodels>=0.14.0; python_version>='3.12'",
     "scipy>=1.11.3"
 ]