Modification to readers plus small changes to dplpy and pyproject (#55)

* mostly change to readers for RWL files 1. change readers to handle series IDs of different lengths and the potential for negative (B.C.) years 2. removed directory change in __init__ so that default is path where the notebook or script is * Update __init__.py * Update readers.py --------- Co-authored-by: Michele Cosi <[email protected]>
OpenDendro · Nov 30, 2023 · 68b9aa1 · 68b9aa1
1 parent 796042d
commit 68b9aa1
Show file tree

Hide file tree

Showing 13 changed files with 73 additions and 73 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,8 @@
+# Scripts for testing
+src/test_*.py
+src/*.txt
+src/misc.py
+
 # IDE stuff
 .DS_Store
 .vscode/

diff --git a/dplpy/__init__.py b/dplpy/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Tyson Lee Swetnam"
 __email__ = "[email protected]"
-__version__ = "0.1.testing"
+__version__ = "0.1"
 
 __copyright__ = """
    dplPy for tree ring width time series analyses
@@ -28,13 +28,11 @@
 import os
 import sys
 
-os.chdir(os.path.dirname(os.path.realpath(__file__)))
 lpath = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(lpath)
 
 del os, sys
 
-
 _hard_dependencies = ("pandas", "numpy", "scipy", "csaps", "matplotlib", "statsmodels")
 _missing_dependencies = []
 

diff --git a/dplpy/detrend.py b/dplpy/detrend.py
@@ -42,13 +42,14 @@
 
 def detrend(data: pd.DataFrame | pd.Series, fit="spline", method="residual", plot=True, period=None):
     if isinstance(data, pd.DataFrame):
+
         res = pd.DataFrame(index=pd.Index(data.index))
         to_add = [res]
         for column in data.columns:
             to_add.append(detrend_series(data[column], column, fit, method, plot, period=None))
         output_df = pd.concat(to_add, axis=1)
         return output_df.rename_axis(data.index.name)
-
+      
     elif isinstance(data, pd.Series):
         return detrend_series(data, data.name, fit, method, plot)
     else:
@@ -89,6 +90,7 @@ def detrend_series(data, series_name, fit, method, plot, period=None):
     if plot:
         fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7,3))
 
+
         axes[0].plot(x, y, "k-", x, yi, "r-", linewidth=2)
         axes[0].set_xlabel('Year')
         axes[0].set_ylabel('Length')

diff --git a/dplpy/dplpy.py b/dplpy/dplpy.py
@@ -206,14 +206,6 @@ def write_from_parser(args):
 def series_corr_from_parser(args):
     series_corr(input=args.input)
 
-def series_corr_from_parser(args):
-    series_corr(input=args.input)
-
-def common_interval_from_parser(args):
-    common_interval(input=args.input)
-
-def rbar_from_parser(args):
-    rbar(input=args.input)
 # creates whitespace
 print("")
 

diff --git a/dplpy/new.ipynb b/dplpy/new.ipynb
diff --git a/dplpy/readers.py b/dplpy/readers.py
@@ -60,8 +60,8 @@ def readers(filename: str, skip_lines=0, header=False):
     else:
         errorMsg = """
 
-Unable to read file, please check that you're using a supported type
-Accepted file types are .csv and .rwl
+Unable to read the file, please check that you're using a supported type
+Accepted file types are currently .csv and .rwl
 
 Example usages:
 >>> import dplpy as dpl
@@ -81,7 +81,7 @@ def readers(filename: str, skip_lines=0, header=False):
     series_data.set_index('Year', inplace = True, drop = True)
 
     # Display message to show that reading was successful
-    print("\nSUCCESS!\nFile read as:", FORMAT, "file\n")
+    print("File read successfully as:", FORMAT, "file\n")
 
     # Display names of all the series found
     print("Series names:")
@@ -95,7 +95,7 @@ def process_rwl_pandas(filename, skip_lines, header):
 
     with open(filename, "r") as rwl_file:
         file_lines = rwl_file.readlines()[skip_lines:]
-
+            
     rwl_data, first_date, last_date = read_rwl(file_lines)
     if rwl_data is None:
         return None
@@ -126,7 +126,7 @@ def read_rwl(lines):
 
     for line in lines:
         line = line.rstrip("\n")
-
+        
         if line[7] != '-' and line[6] != '-':
             series_id = line[:8].strip()
             iyr = int(line[8:12])
@@ -139,13 +139,13 @@ def read_rwl(lines):
 
         if series_id not in rwl_data:
             rwl_data[series_id] = {}
-
+        
         dataline = [line[i:i+6] for i in range(12, len(line), 6) if line[i:i+6].strip()]
-            
+
         # keep track of the first and last date in the dataset
         line_start = int(iyr)
         first_date = min(first_date, line_start)
-        last_date = max(last_date, (line_start+len(dataline)-1))
+        last_date = max(last_date, (line_start+len(dataline)-1)) 
 
         # will implement some standardization here so that all data read is consistent, and all data written in rwl
         # can be written to one of the two popular precisions.
@@ -159,8 +159,6 @@ def read_rwl(lines):
                     continue
                 data = float(int(dataline[i]))
             except ValueError as valerr: # Stops reader, escalates to give the user an error when unexpected formatting is detected.
-                print("Error:", valerr)
-                print("See line:", line)
                 return None, None, None
             rwl_data[series_id][line_start+i] = data
-    return rwl_data, first_date, last_date
+    return rwl_data, first_date, last_date
diff --git a/dplpy/series_corr.py b/dplpy/series_corr.py
@@ -38,7 +38,6 @@
 from autoreg import ar_func_series
 from chron import chron
 from xdate import get_ar_lag, correlate, compare_segment, get_bins, get_crit
-
 import pandas as pd
 import numpy as np
 import scipy

diff --git a/dplpy/writers.py b/dplpy/writers.py
@@ -108,40 +108,28 @@ def write_rwl(data, file):
         start = data[series].first_valid_index()
         end = data[series].last_valid_index()
         i = start
-
-        if i < 0:
-            file.write(series.ljust(7))
-            file.write(str(i).rjust(5))
-        else:
-            file.write(series.ljust(8))
-            file.write(str(i).rjust(4))
+        file.write(series.rjust(6) + "\t")
+        file.write(str(i).rjust(4) + "\t")
         while i <= end:
             if np.isnan(data[series][i]):
-                file.write(str(-9999).rjust(6))
+                file.write(str(-9999))
                 file.write("\n")
                 while i <= end and np.isnan(data[series][i]):
                     i += 1
                 if i <= end:
-                    if i < 0:
-                        file.write(series.ljust(7))
-                        file.write(str(i).rjust(5))
-                    else:
-                        file.write(series.ljust(8))
-                        file.write(str(i).rjust(4))
+                    file.write(series.rjust(6) + "\t")
+                    file.write(str(i).rjust(4) + "\t")
                 continue
 
-            file.write((f"{data[series][i]:.3f}").lstrip('0').replace('.', '').rjust(4, '0').rjust(6))
+            file.write((f"{data[series][i]:.3f}").lstrip('0').replace('.', '').rjust(4, '0') + "\t")
             i += 1
             if i % 10 == 0:
                 file.write("\n")
-                if i < 0:
-                    file.write(series.ljust(7))
-                    file.write(str(i).rjust(5))
-                else:
-                    file.write(series.ljust(8))
-                    file.write(str(i).rjust(4))
-
-        file.write(str(-9999).rjust(6))
+                file.write(series.rjust(6) + "\t")
+                file.write(str(i).rjust(4) + "\t")
+
+
+        file.write(str(-9999))
         file.write("\n")
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name="dplpy"
-description="dendrochronology library for Python"
+description="Dendrochronology Program Library for Python"
 readme="README.md"
 version="0.1"
 authors=[
@@ -16,16 +16,12 @@ authors=[
     {name='Kevin Anchukaitis', email='[email protected]'},
 ]
 license={file='LICENSE'}
-requires-python = '>=3.10'
+requires-python = '>=3.11'
 dependencies=[
-    "numpy>=1.22.4,<2; python_version<'3.11'",
-    "numpy>=1.23.2,<2; python_version=='3.11'",
-    "numpy>=1.26.0,<2; python_version>='3.12'",
-    "pandas>=2.0.0",
-
+    "pandas>=2.1.1", 
+    "numpy>=1.26.0", 
     "csaps==1.1.0",
-    "matplotlib>=3.8.0",
-    "statsmodels>=0.13.5; python_version>='3.10'",
-    "statsmodels>=0.14.0; python_version>='3.12'",
+    "matplotlib>=3.8.1",
+    "statsmodels>=0.14.0",
     "scipy>=1.11.3"
 ]
diff --git a/tests/integs/test_integ_chron.py b/tests/integs/test_integ_chron.py
@@ -1,7 +1,10 @@
 import pytest
 import dplpy as dpl
+import pandas as pd
+import os
 
 def test_chron_no_prewhiten_no_biweight():
+    print(os.getcwd())
     data = dpl.readers("../tests/data/csv/ca533.csv")
 
     res = dpl.chron(data, biweight=False, prewhiten=False, plot=False)

diff --git a/tests/integs/test_integ_readers_and_writers.py b/tests/integs/test_integ_readers_and_writers.py
@@ -50,13 +50,3 @@ def test_read_and_write_long_rwl(tmp_path):
 
     pd.testing.assert_frame_equal(ca667, ca667_alt)
 
-def test_read_and_write_weird_rwl(tmp_path):
-    wwr = dpl.readers("../tests/data/rwl/wwr.rwl")
-
-    write_path = os.path.join(tmp_path, "test_write")
-
-    dpl.write(wwr, write_path, "rwl")
-
-    wwr_alt = dpl.readers(write_path + ".rwl")
-
-    pd.testing.assert_frame_equal(wwr, wwr_alt)
diff --git a/tests/unit/test_readers.py b/tests/unit/test_readers.py
@@ -60,15 +60,15 @@ def mock_open_output(file_path, open_type):
     )
 
     if file_path == "valid_rwl_correct_format.rwl":
-        wrapper.write("SeriesA 1       10    30    50    70   999\n")
-        wrapper.write("SeriesB 1      200   400   600   800 -9999\n")
+        wrapper.write("SeriesA  1   10 30 50 70 999\n")
+        wrapper.write("SeriesB  1   200 400 600 800 -9999\n")
         wrapper.seek(0,0)
     elif file_path == "valid_rwl_with_headers.rwl":
         wrapper.write("Header line 1\n")
         wrapper.write("Header line 2\n")
         wrapper.write("Header line 3\n")
-        wrapper.write("SeriesA 1       10    30    50    70   999\n")
-        wrapper.write("SeriesB 1      200   400   600   800 -9999\n")
+        wrapper.write("SeriesA  1   10 30 50 70 999\n")
+        wrapper.write("SeriesB  1   200 400 600 800 -9999\n")
         wrapper.seek(0,0)
     else:
         raise OSError("File not found")

diff --git a/tests/unit/test_writers.py b/tests/unit/test_writers.py
@@ -71,8 +71,8 @@ def test_write_rwl(tmpdir):
 
     dpl.write(input_df, file.strpath[:-4], "rwl")
 
-    expected_rwl_lines = ['SeriesA    1  0100  0300  0500  0700 -9999\n',
-                          'SeriesB    1  0200  0400  0600  0800 -9999\n']
+    expected_rwl_lines = ['SeriesA\t   1\t0100\t0300\t0500\t0700\t-9999\n',
+                            'SeriesB\t   1\t0200\t0400\t0600\t0800\t-9999\n']
 
     assert expected_rwl_lines == file.readlines()