Skip to content

Commit

Permalink
Dev ife (#64)
Browse files Browse the repository at this point in the history
* xdate works for overall series correlation

* Added code for creating bins and dividing series into segments

* Cleaning up and commenting related to xdate

* series_corr works but is inefficient

* WIP changes

* Added comments, updated working jupyter notebook

* Changes since start of fall semester

* variance stabiliization produces accurate values

* Unit tests for readers, summary, stats and tbrm

* Added unit tests for detrend and chron

* Added tests for chron_stabilized, series_corr and writers

* Fixed merge conflicts

* v0.1 release

* Fixing the rwl reader and writer problem

* Create pypi_release.yml

* push for workflow (#54)

* Update pypi_release.yml

* Update pyproject.toml

* xdate works for overall series correlation

* series_corr works but is inefficient

* WIP changes

* Added comments, updated working jupyter notebook

* Changes since start of fall semester

* variance stabiliization produces accurate values

* Unit tests for readers, summary, stats and tbrm

* Added tests for chron_stabilized, series_corr and writers

* v0.1 release

* Attempt at fixing dependency issues

* Create pypi_release.yml

* Update pypi_release.yml

* Modification to readers plus small changes to dplpy and pyproject (#55)

* mostly change to readers for RWL files

1. change readers to handle series IDs of different lengths and the potential for negative (B.C.) years

2. removed directory change in __init__ so that default is path where the notebook or script is

* Update __init__.py

* Update readers.py

---------

Co-authored-by: Michele Cosi <[email protected]>

* Revert "Modification to readers plus small changes to dplpy and pyproject (#55)"

This reverts commit 68b9aa1.

* Update pyproject.toml (#57)

* Update update.txt

* Github workflow for unit testing

* Added github workflow to run integration tests

* Fixed issue that caused integs to fail in previous commit

* Changed write function in writers.py to writers

* Fixed deprecation warning for \d regex expression

---------

Co-authored-by: Ifeoluwa Ale <[email protected]>
Co-authored-by: cosimichele <[email protected]>
Co-authored-by: Ifeoluwa Ale <[email protected]>
Co-authored-by: Michele Cosi <[email protected]>
Co-authored-by: Kevin Anchukaitis <[email protected]>
  • Loading branch information
6 people authored Dec 28, 2023
1 parent 9b6bf02 commit 2cdb217
Show file tree
Hide file tree
Showing 16 changed files with 95 additions and 45 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/pypi_release.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Build the package and publish it to PyPI after tests pass.
name: Publish to PyPI
on:
push:
workflow_run:
workflows: ["run_tests"]
types:
- completed
branches:
- main
tags:
- "*"

jobs:
publish:
Expand Down
31 changes: 31 additions & 0 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Python package

on: [push]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"] # Goal is to run tests for different python versions

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r ${{ matrix.python-version }}-requirements.txt
pip install pytest pytest-cov
- name: Run unit tests
run: |
pytest tests/unit --cov=dplpy
- name: Run integ tests
run: |
pytest tests/integs -rA
6 changes: 6 additions & 0 deletions 3.10-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
numpy==1.22.4
statsmodels==0.13.5
matplotlib==3.8.0
csaps==1.1.0
pandas==2.0.0
scipy==1.11.3
6 changes: 6 additions & 0 deletions 3.11-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
numpy==1.23.2
statsmodels==0.13.5
matplotlib==3.8.0
csaps==1.1.0
pandas==2.0.0
scipy==1.11.3
6 changes: 6 additions & 0 deletions 3.12-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
numpy==1.26.0
statsmodels==0.14.0
matplotlib==3.8.0
csaps==1.1.0
pandas==2.0.0
scipy==1.11.3
4 changes: 2 additions & 2 deletions dplpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
from chron_stabilized import chron_stabilized
from xdate import xdate, xdate_plot
from series_corr import series_corr
from writers import write
from writers import writers

__all__ = [
readers,
Expand All @@ -77,5 +77,5 @@
xdate,
xdate_plot,
series_corr,
write
writers
]
2 changes: 1 addition & 1 deletion dplpy/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
It also allows you to append files that are missing metadata and write them back out
Accepted file types are CSV, RWL, CRN (in dev) and TXT (in dev)
"""
def write(data, label, format):
def writers(data, label, format):
if not isinstance(data, pd.DataFrame):
raise TypeError("Expected input data to be pandas dataframe, not " + str(type(data)))

Expand Down
4 changes: 2 additions & 2 deletions dplpy/xdate.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ def xdate(data: pd.DataFrame, prewhiten=True, corr="Spearman", slide_period=50,
# evaluation of current series vs chronology of others by segments of years (the bins created earlier)
for range in bins:
# print(range) # useful for debugging but not necessary once operational
start = int(re.split("(?<=\d)-", range)[0])
end = int(re.split("(?<=\d)-", range)[1])
start = int(re.split("(?<=\\d)-", range)[0])
end = int(re.split("(?<=\\d)-", range)[1])
if start >= removed.first_valid_index() and end <= removed.last_valid_index():
segment = removed.loc[start:end]

Expand Down
8 changes: 4 additions & 4 deletions tests/integs/test_integ_chron.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,27 @@
import dplpy as dpl

def test_chron_no_prewhiten_no_biweight():
data = dpl.readers("../tests/data/csv/ca533.csv")
data = dpl.readers("./tests/data/csv/ca533.csv")

res = dpl.chron(data, biweight=False, prewhiten=False, plot=False)
# TODO: assert contents of res


def test_chron_prewhiten_no_biweight():
data = dpl.readers("../tests/data/csv/ca533.csv")
data = dpl.readers("./tests/data/csv/ca533.csv")

res = dpl.chron(data, biweight=True, prewhiten=False, plot=False)
# TODO: assert contents of res

def test_chron_prewhiten_with_biweight():
data = dpl.readers("../tests/data/csv/ca533.csv")
data = dpl.readers("./tests/data/csv/ca533.csv")

res = dpl.chron(data, biweight=True, prewhiten=True, plot=False)
# TODO: assert contents of res


# def test_chron_prewhiten_biweight_plot():
# data = dpl.readers("../integs/data/csv/ca533.csv")
# data = dpl.readers("./integs/data/csv/ca533.csv")

# res = dpl.chron(data, biweight=True, prewhiten=True, plot=True)
# # TODO: assert contents of res
Expand Down
6 changes: 3 additions & 3 deletions tests/integs/test_integ_detrend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os

def test_detrend_all_fits_residual():
data = dpl.readers("../tests/data/csv/ca533.csv")
data = dpl.readers("./tests/data/csv/ca533.csv")

spline_data = dpl.detrend(data, fit="spline", method="residual", plot=False)
modnegex_data = dpl.detrend(data, fit="ModNegEx", method="residual", plot=False)
Expand All @@ -15,7 +15,7 @@ def test_detrend_all_fits_residual():
# TODO: assert detrended data for correctness

def test_detrend_all_fits_difference():
data = dpl.readers("../tests/data/csv/ca533.csv")
data = dpl.readers("./tests/data/csv/ca533.csv")

spline_data = dpl.detrend(data, fit="spline", method="difference", plot=False)
modnegex_data = dpl.detrend(data, fit="ModNegEx", method="difference", plot=False)
Expand All @@ -27,7 +27,7 @@ def test_detrend_all_fits_difference():

# Commented out because plots block execution in vscode. WIP
# def test_detrend_all_fits_plot():
# data = dpl.readers("../integs/data/csv/ca533.csv")
# data = dpl.readers("./integs/data/csv/ca533.csv")

# spline_data = dpl.detrend(data, fit="spline", method="difference", plot=True)
# modnegex_data = dpl.detrend(data, fit="ModNegEx", method="difference", plot=True)
Expand Down
12 changes: 6 additions & 6 deletions tests/integs/test_integ_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,23 @@

# #TODO: assert content of plots somehow
# def test_seg_plots():
# ca533 = dpl.readers("../integs/data/csv/ca533.csv")
# ca667 = dpl.readers("../integs/data/rwl/ca667.rwl", header=True)
# ca533 = dpl.readers("./integs/data/csv/ca533.csv")
# ca667 = dpl.readers("./integs/data/rwl/ca667.rwl", header=True)

# dpl.plot(ca533, "seg")
# dpl.plot(ca667, "seg")


# def test_spag_plots():
# ca533 = dpl.readers("../integs/data/csv/ca533.csv")
# ca667 = dpl.readers("../integs/data/rwl/ca667.rwl", header=True)
# ca533 = dpl.readers("./integs/data/csv/ca533.csv")
# ca667 = dpl.readers("./integs/data/rwl/ca667.rwl", header=True)

# dpl.plot(ca533, "spag")
# dpl.plot(ca667, "spag")

# def test_line_plots():
# ca533 = dpl.readers("../integs/data/csv/ca533.csv")
# ca667 = dpl.readers("../integs/data/rwl/ca667.rwl", header=True)
# ca533 = dpl.readers("./integs/data/csv/ca533.csv")
# ca667 = dpl.readers("./integs/data/rwl/ca667.rwl", header=True)

# dpl.plot(ca533, "line")
# dpl.plot(ca667, "line")
Expand Down
20 changes: 10 additions & 10 deletions tests/integs/test_integ_readers_and_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,58 +4,58 @@
import os

def test_read_and_write_csv(tmp_path):
ca533 = dpl.readers("../tests/data/csv/ca533.csv")
ca533 = dpl.readers("./tests/data/csv/ca533.csv")

write_path = os.path.join(tmp_path,"test_write")

dpl.write(ca533, write_path, "csv")
dpl.writers(ca533, write_path, "csv")

ca533_alt = dpl.readers(write_path + ".csv")

pd.testing.assert_frame_equal(ca533, ca533_alt)


def test_read_and_write_rwl_no_headers(tmp_path):
viet001 = dpl.readers("../tests/data/rwl/viet001.rwl")
viet001 = dpl.readers("./tests/data/rwl/viet001.rwl")

write_path = os.path.join(tmp_path, "test_write")

dpl.write(viet001, write_path, "rwl")
dpl.writers(viet001, write_path, "rwl")

viet001_alt = dpl.readers(write_path + ".rwl")

pd.testing.assert_frame_equal(viet001, viet001_alt)


def test_read_and_write_rwl_with_headers(tmp_path):
th001 = dpl.readers("../tests/data/rwl/th001.rwl", header=True)
th001 = dpl.readers("./tests/data/rwl/th001.rwl", header=True)

write_path = os.path.join(tmp_path, "test_write")

dpl.write(th001, write_path, "rwl")
dpl.writers(th001, write_path, "rwl")

th001_alt = dpl.readers(write_path + ".rwl")

pd.testing.assert_frame_equal(th001, th001_alt)


def test_read_and_write_long_rwl(tmp_path):
ca667 = dpl.readers("../tests/data/rwl/ca667.rwl", header=True)
ca667 = dpl.readers("./tests/data/rwl/ca667.rwl", header=True)

write_path = os.path.join(tmp_path, "test_write")

dpl.write(ca667, write_path, "rwl")
dpl.writers(ca667, write_path, "rwl")

ca667_alt = dpl.readers(write_path + ".rwl")

pd.testing.assert_frame_equal(ca667, ca667_alt)

def test_read_and_write_weird_rwl(tmp_path):
wwr = dpl.readers("../tests/data/rwl/wwr.rwl")
wwr = dpl.readers("./tests/data/rwl/wwr.rwl")

write_path = os.path.join(tmp_path, "test_write")

dpl.write(wwr, write_path, "rwl")
dpl.writers(wwr, write_path, "rwl")

wwr_alt = dpl.readers(write_path + ".rwl")

Expand Down
2 changes: 1 addition & 1 deletion tests/integs/test_integ_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os

def test_summary_methods():
data = dpl.readers("../tests/data/csv/ca533.csv")
data = dpl.readers("./tests/data/csv/ca533.csv")

dpl.summary(data)
dpl.report(data)
Expand Down
10 changes: 5 additions & 5 deletions tests/integs/test_integ_xdate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,32 @@
import os

def test_xdate_diff_bins():
ca533 = dpl.readers("../tests/data/csv/ca533.csv")
ca533 = dpl.readers("./tests/data/csv/ca533.csv")

ca533_bindata_1 = dpl.xdate(ca533, bin_floor=0)
ca533_bindata_2 = dpl.xdate(ca533, bin_floor=10)
ca533_bindata_3 = dpl.xdate(ca533, bin_floor=100)

def test_xdate_diff_slide_periods():
ca533 = dpl.readers("../tests/data/csv/ca533.csv")
ca533 = dpl.readers("./tests/data/csv/ca533.csv")

ca533_bindata_1 = dpl.xdate(ca533, slide_period=30)
ca533_bindata_2 = dpl.xdate(ca533, slide_period=50)
ca533_bindata_3 = dpl.xdate(ca533, slide_period=80)

def test_xdate_diff_corrs():
ca667 = dpl.readers("../tests/data/rwl/ca667.rwl", header=True)
ca667 = dpl.readers("./tests/data/rwl/ca667.rwl", header=True)

ca667_bindata_1 = dpl.xdate(ca667, corr="Spearman")
ca667_bindata_2 = dpl.xdate(ca667, corr="Pearson")

def test_xdate_not_prewhitened():
ca667 = dpl.readers("../tests/data/rwl/ca667.rwl", header=True)
ca667 = dpl.readers("./tests/data/rwl/ca667.rwl", header=True)

ca667_bindata = dpl.xdate(ca667, prewhiten=False)

# Commented out because plots block execution in vscode. WIP
# def test_xdate_plot():
# co021 = dpl.readers("../integs/data/rwl/co021.rwl")
# co021 = dpl.readers("./integs/data/rwl/co021.rwl")

# dpl.xdate_plot(co021)
6 changes: 3 additions & 3 deletions tests/integs/test_variance_stab.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
import pandas as pd

def test_chron_stab_no_biweight():
data = dpl.readers("../tests/data/csv/ca533.csv")
data = dpl.readers("./tests/data/csv/ca533.csv")

res = dpl.chron_stabilized(data, biweight=False)
# TODO: assert contents of res


def test_chron_stab_with_biweight():
data = dpl.readers("../tests/data/csv/ca533.csv")
data = dpl.readers("./tests/data/csv/ca533.csv")

res = dpl.chron_stabilized(data)
# TODO: assert contents of res

def test_chron_stab_with_running_rbar():
data = dpl.readers("../tests/data/csv/ca533.csv")
data = dpl.readers("./tests/data/csv/ca533.csv")

res = dpl.chron_stabilized(data, running_rbar=True)
# TODO: assert contents of res
10 changes: 5 additions & 5 deletions tests/unit/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_write_invalid_type_data():
"Year": [1, 2, 3, 4]})

with pytest.raises(TypeError) as errorMsg:
dpl.write(input_df['SeriesA'], "label", "ext")
dpl.writers(input_df['SeriesA'], "label", "ext")
expected_msg = "Expected input data to be pandas dataframe, not <class 'pandas.core.series.Series'>"
assert expected_msg == str(errorMsg.value)

Expand All @@ -27,7 +27,7 @@ def test_write_invalid_type_label():
"Year": [1, 2, 3, 4]})

with pytest.raises(TypeError) as errorMsg:
dpl.write(input_df, 1, "ext")
dpl.writers(input_df, 1, "ext")
expected_msg = "Expected label to be of type str, not <class 'int'>"
assert expected_msg == str(errorMsg.value)

Expand All @@ -38,7 +38,7 @@ def test_write_invalid_type_format():
"Year": [1, 2, 3, 4]})

with pytest.raises(TypeError) as errorMsg:
dpl.write(input_df, "label", 1)
dpl.writers(input_df, "label", 1)
expected_msg = "Expected format to be of type str, not <class 'int'>"
assert expected_msg == str(errorMsg.value)

Expand All @@ -50,7 +50,7 @@ def test_write_csv(tmpdir):

file = tmpdir.join('output.csv')

dpl.write(input_df, file.strpath[:-4], "csv")
dpl.writers(input_df, file.strpath[:-4], "csv")

expected_csv_lines = ['"Year","SeriesA","SeriesB"\n',
'1,0.1,0.2\n',
Expand All @@ -69,7 +69,7 @@ def test_write_rwl(tmpdir):

file = tmpdir.join('output.rwl')

dpl.write(input_df, file.strpath[:-4], "rwl")
dpl.writers(input_df, file.strpath[:-4], "rwl")

expected_rwl_lines = ['SeriesA 1 0100 0300 0500 0700 -9999\n',
'SeriesB 1 0200 0400 0600 0800 -9999\n']
Expand Down

0 comments on commit 2cdb217

Please sign in to comment.