Skip to content

Commit

Permalink
Modification to readers plus small changes to dplpy and pyproject (#55)
Browse files Browse the repository at this point in the history
* mostly change to readers for RWL files

1. change readers to handle series IDs of different lengths and the potential for negative (B.C.) years

2. removed directory change in __init__ so that default is path where the notebook or script is

* Update __init__.py

* Update readers.py

---------

Co-authored-by: Michele Cosi <[email protected]>
  • Loading branch information
kanchukaitis and CosiMichele committed Nov 30, 2023
1 parent 796042d commit 68b9aa1
Show file tree
Hide file tree
Showing 13 changed files with 73 additions and 73 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Scripts for testing
src/test_*.py
src/*.txt
src/misc.py

# IDE stuff
.DS_Store
.vscode/
Expand Down
4 changes: 1 addition & 3 deletions dplpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

__author__ = "Tyson Lee Swetnam"
__email__ = "[email protected]"
__version__ = "0.1.testing"
__version__ = "0.1"

__copyright__ = """
dplPy for tree ring width time series analyses
Expand All @@ -28,13 +28,11 @@
import os
import sys

os.chdir(os.path.dirname(os.path.realpath(__file__)))
lpath = os.path.dirname(os.path.realpath(__file__))
sys.path.append(lpath)

del os, sys


_hard_dependencies = ("pandas", "numpy", "scipy", "csaps", "matplotlib", "statsmodels")
_missing_dependencies = []

Expand Down
4 changes: 3 additions & 1 deletion dplpy/detrend.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,14 @@

def detrend(data: pd.DataFrame | pd.Series, fit="spline", method="residual", plot=True, period=None):
if isinstance(data, pd.DataFrame):

res = pd.DataFrame(index=pd.Index(data.index))
to_add = [res]
for column in data.columns:
to_add.append(detrend_series(data[column], column, fit, method, plot, period=None))
output_df = pd.concat(to_add, axis=1)
return output_df.rename_axis(data.index.name)

elif isinstance(data, pd.Series):
return detrend_series(data, data.name, fit, method, plot)
else:
Expand Down Expand Up @@ -89,6 +90,7 @@ def detrend_series(data, series_name, fit, method, plot, period=None):
if plot:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7,3))


axes[0].plot(x, y, "k-", x, yi, "r-", linewidth=2)
axes[0].set_xlabel('Year')
axes[0].set_ylabel('Length')
Expand Down
8 changes: 0 additions & 8 deletions dplpy/dplpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,14 +206,6 @@ def write_from_parser(args):
def series_corr_from_parser(args):
series_corr(input=args.input)

def series_corr_from_parser(args):
series_corr(input=args.input)

def common_interval_from_parser(args):
common_interval(input=args.input)

def rbar_from_parser(args):
rbar(input=args.input)
# creates whitespace
print("")

Expand Down
29 changes: 29 additions & 0 deletions dplpy/new.ipynb

Large diffs are not rendered by default.

20 changes: 9 additions & 11 deletions dplpy/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ def readers(filename: str, skip_lines=0, header=False):
else:
errorMsg = """
Unable to read file, please check that you're using a supported type
Accepted file types are .csv and .rwl
Unable to read the file, please check that you're using a supported type
Accepted file types are currently .csv and .rwl
Example usages:
>>> import dplpy as dpl
Expand All @@ -81,7 +81,7 @@ def readers(filename: str, skip_lines=0, header=False):
series_data.set_index('Year', inplace = True, drop = True)

# Display message to show that reading was successful
print("\nSUCCESS!\nFile read as:", FORMAT, "file\n")
print("File read successfully as:", FORMAT, "file\n")

# Display names of all the series found
print("Series names:")
Expand All @@ -95,7 +95,7 @@ def process_rwl_pandas(filename, skip_lines, header):

with open(filename, "r") as rwl_file:
file_lines = rwl_file.readlines()[skip_lines:]

rwl_data, first_date, last_date = read_rwl(file_lines)
if rwl_data is None:
return None
Expand Down Expand Up @@ -126,7 +126,7 @@ def read_rwl(lines):

for line in lines:
line = line.rstrip("\n")

if line[7] != '-' and line[6] != '-':
series_id = line[:8].strip()
iyr = int(line[8:12])
Expand All @@ -139,13 +139,13 @@ def read_rwl(lines):

if series_id not in rwl_data:
rwl_data[series_id] = {}

dataline = [line[i:i+6] for i in range(12, len(line), 6) if line[i:i+6].strip()]

# keep track of the first and last date in the dataset
line_start = int(iyr)
first_date = min(first_date, line_start)
last_date = max(last_date, (line_start+len(dataline)-1))
last_date = max(last_date, (line_start+len(dataline)-1))

# will implement some standardization here so that all data read is consistent, and all data written in rwl
# can be written to one of the two popular precisions.
Expand All @@ -159,8 +159,6 @@ def read_rwl(lines):
continue
data = float(int(dataline[i]))
except ValueError as valerr: # Stops reader, escalates to give the user an error when unexpected formatting is detected.
print("Error:", valerr)
print("See line:", line)
return None, None, None
rwl_data[series_id][line_start+i] = data
return rwl_data, first_date, last_date
return rwl_data, first_date, last_date
1 change: 0 additions & 1 deletion dplpy/series_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
from autoreg import ar_func_series
from chron import chron
from xdate import get_ar_lag, correlate, compare_segment, get_bins, get_crit

import pandas as pd
import numpy as np
import scipy
Expand Down
34 changes: 11 additions & 23 deletions dplpy/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,40 +108,28 @@ def write_rwl(data, file):
start = data[series].first_valid_index()
end = data[series].last_valid_index()
i = start

if i < 0:
file.write(series.ljust(7))
file.write(str(i).rjust(5))
else:
file.write(series.ljust(8))
file.write(str(i).rjust(4))
file.write(series.rjust(6) + "\t")
file.write(str(i).rjust(4) + "\t")
while i <= end:
if np.isnan(data[series][i]):
file.write(str(-9999).rjust(6))
file.write(str(-9999))
file.write("\n")
while i <= end and np.isnan(data[series][i]):
i += 1
if i <= end:
if i < 0:
file.write(series.ljust(7))
file.write(str(i).rjust(5))
else:
file.write(series.ljust(8))
file.write(str(i).rjust(4))
file.write(series.rjust(6) + "\t")
file.write(str(i).rjust(4) + "\t")
continue

file.write((f"{data[series][i]:.3f}").lstrip('0').replace('.', '').rjust(4, '0').rjust(6))
file.write((f"{data[series][i]:.3f}").lstrip('0').replace('.', '').rjust(4, '0') + "\t")
i += 1
if i % 10 == 0:
file.write("\n")
if i < 0:
file.write(series.ljust(7))
file.write(str(i).rjust(5))
else:
file.write(series.ljust(8))
file.write(str(i).rjust(4))

file.write(str(-9999).rjust(6))
file.write(series.rjust(6) + "\t")
file.write(str(i).rjust(4) + "\t")


file.write(str(-9999))
file.write("\n")


Expand Down
16 changes: 6 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"

[project]
name="dplpy"
description="dendrochronology library for Python"
description="Dendrochronology Program Library for Python"
readme="README.md"
version="0.1"
authors=[
Expand All @@ -16,16 +16,12 @@ authors=[
{name='Kevin Anchukaitis', email='[email protected]'},
]
license={file='LICENSE'}
requires-python = '>=3.10'
requires-python = '>=3.11'
dependencies=[
"numpy>=1.22.4,<2; python_version<'3.11'",
"numpy>=1.23.2,<2; python_version=='3.11'",
"numpy>=1.26.0,<2; python_version>='3.12'",
"pandas>=2.0.0",

"pandas>=2.1.1",
"numpy>=1.26.0",
"csaps==1.1.0",
"matplotlib>=3.8.0",
"statsmodels>=0.13.5; python_version>='3.10'",
"statsmodels>=0.14.0; python_version>='3.12'",
"matplotlib>=3.8.1",
"statsmodels>=0.14.0",
"scipy>=1.11.3"
]
3 changes: 3 additions & 0 deletions tests/integs/test_integ_chron.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import pytest
import dplpy as dpl
import pandas as pd
import os

def test_chron_no_prewhiten_no_biweight():
print(os.getcwd())
data = dpl.readers("../tests/data/csv/ca533.csv")

res = dpl.chron(data, biweight=False, prewhiten=False, plot=False)
Expand Down
10 changes: 0 additions & 10 deletions tests/integs/test_integ_readers_and_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,3 @@ def test_read_and_write_long_rwl(tmp_path):

pd.testing.assert_frame_equal(ca667, ca667_alt)

def test_read_and_write_weird_rwl(tmp_path):
wwr = dpl.readers("../tests/data/rwl/wwr.rwl")

write_path = os.path.join(tmp_path, "test_write")

dpl.write(wwr, write_path, "rwl")

wwr_alt = dpl.readers(write_path + ".rwl")

pd.testing.assert_frame_equal(wwr, wwr_alt)
8 changes: 4 additions & 4 deletions tests/unit/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,15 @@ def mock_open_output(file_path, open_type):
)

if file_path == "valid_rwl_correct_format.rwl":
wrapper.write("SeriesA 1 10 30 50 70 999\n")
wrapper.write("SeriesB 1 200 400 600 800 -9999\n")
wrapper.write("SeriesA 1 10 30 50 70 999\n")
wrapper.write("SeriesB 1 200 400 600 800 -9999\n")
wrapper.seek(0,0)
elif file_path == "valid_rwl_with_headers.rwl":
wrapper.write("Header line 1\n")
wrapper.write("Header line 2\n")
wrapper.write("Header line 3\n")
wrapper.write("SeriesA 1 10 30 50 70 999\n")
wrapper.write("SeriesB 1 200 400 600 800 -9999\n")
wrapper.write("SeriesA 1 10 30 50 70 999\n")
wrapper.write("SeriesB 1 200 400 600 800 -9999\n")
wrapper.seek(0,0)
else:
raise OSError("File not found")
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ def test_write_rwl(tmpdir):

dpl.write(input_df, file.strpath[:-4], "rwl")

expected_rwl_lines = ['SeriesA 1 0100 0300 0500 0700 -9999\n',
'SeriesB 1 0200 0400 0600 0800 -9999\n']
expected_rwl_lines = ['SeriesA\t 1\t0100\t0300\t0500\t0700\t-9999\n',
'SeriesB\t 1\t0200\t0400\t0600\t0800\t-9999\n']

assert expected_rwl_lines == file.readlines()

Expand Down

0 comments on commit 68b9aa1

Please sign in to comment.