From 044aa42324a27bdbcca0f0d0ca841285108a017a Mon Sep 17 00:00:00 2001 From: MartinSchobben Date: Wed, 14 Aug 2024 16:55:40 +0200 Subject: [PATCH] add dev tools an unittests --- environment.yml | 9 +++ pyproject.toml | 3 + setup.cfg | 34 +++++++++ setup.py | 3 + src/clean-nb.py | 73 -------------------- src/eo_datascience/__init__.py | 4 ++ src/eo_datascience/_version.py | 2 + src/eo_datascience/clean_nb.py | 60 ++++++++++++++++ src/{ => eo_datascience}/render_sfinx_toc.py | 0 tests/mock.ipynb | 29 ++++++++ tests/test_quarto_nb_conversions.py | 10 +++ 11 files changed, 154 insertions(+), 73 deletions(-) create mode 100644 environment.yml create mode 100644 pyproject.toml create mode 100644 setup.cfg create mode 100644 setup.py delete mode 100644 src/clean-nb.py create mode 100644 src/eo_datascience/__init__.py create mode 100644 src/eo_datascience/_version.py create mode 100644 src/eo_datascience/clean_nb.py rename src/{ => eo_datascience}/render_sfinx_toc.py (100%) create mode 100644 tests/mock.ipynb create mode 100644 tests/test_quarto_nb_conversions.py diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..3728b1f --- /dev/null +++ b/environment.yml @@ -0,0 +1,9 @@ +name: eo-datascience +channels: + - conda-forge +dependencies: + - python=3.10 + - pip + - mamba + - jupyter + - nbformat \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7fd26b9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..5a13289 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,34 @@ +[metadata] +name = eo_datascience +version = attr: eo_datascience.__version__ +description = Examples of TUWien Jupyter notebooks for education +author = TU Wien GEO MRS group +author_email = martin.schobben@geo.tuwien.ac.at +long_description = file: README.md +url = https://github.com/TUW-GEO/eo-datascience +platforms = any +classifiers = + Intended Audience :: Science/Research + Topic :: Scientific/Engineering + Topic :: Scientific/Engineering :: GIS + Topic :: Software Development :: Libraries + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Operating System :: POSIX + Natural Language :: English + +[options] +package_dir = + = src +packages = find: +install_requires = + nbformat + +[options.packages.find] +where = src + +[options.extras_require] +test = + pytest + pytest-cov diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6068493 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +from setuptools import setup + +setup() diff --git a/src/clean-nb.py b/src/clean-nb.py deleted file mode 100644 index dff6e1e..0000000 --- a/src/clean-nb.py +++ /dev/null @@ -1,73 +0,0 @@ -import os -import nbformat -from pathlib import Path -from bs4 import BeautifulSoup - - -def clean_up_frontmatter(): - # Define the path to the notebooks - root = Path('./notebooks').resolve() - nb_paths = [root / file for file in os.listdir(root) if file.endswith('.ipynb')] - - # Iterate over the notebooks - for nb_path in nb_paths: - # Load the notebook - nb = nbformat.read(nb_path, as_version=4) - if nb.cells[0].source.startswith('---'): - #Load frontmatter - fm = nb.cells[0].source.split('\n') - - # Extract the title and the subtitle - title, subtitle = '', '' - for line in fm: - if line.startswith('title'): - title = line.split(': ')[1] - if line.startswith('subtitle'): - subtitle = line.split(': ')[1] - - # Update the cell - nb.cells[0].source = f'# {title}\n{subtitle}\n' - - # Save the notebook - nbformat.write(nb, nb_path) - -def clean_up_references(): - # Load the references.html file - html_file_path = Path('_book/chapters/references.html') - with open(html_file_path, 'r', encoding='utf-8') as file: - html_content = file.read() - - # Parse the HTML content - soup = BeautifulSoup(html_content) - references_div = soup.find('div', {'id': 'refs', 'class': 'references csl-bib-body hanging-indent'}) - - # Format the references as string - references_list = [] - for ref in references_div.get_text().split('\n\n\n'): - ref = ref.replace('\n\n', '') - ref = ref.replace('\n', ' ') - references_list.append(ref) - - # Indent the references - #ref_list = ['\t' + ref for ref in references_list] - - # Merge the references into a single string - output_str = '\n\n'.join(references_list) - - # Load the References notebook - ref_nb_path = Path('./notebooks/references.ipynb').resolve() - nb = nbformat.read(ref_nb_path, as_version=4) - - # Update the cell - nb.cells[0].source = f'# References\n\n{output_str}' - - # Save the notebook - nbformat.write(nb, ref_nb_path) - -def main(): - clean_up_frontmatter() - clean_up_references() - - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/src/eo_datascience/__init__.py b/src/eo_datascience/__init__.py new file mode 100644 index 0000000..09ba5cd --- /dev/null +++ b/src/eo_datascience/__init__.py @@ -0,0 +1,4 @@ +from eo_datascience._version import __commit__ +from eo_datascience._version import __version__ + +name = "eo_datascience" \ No newline at end of file diff --git a/src/eo_datascience/_version.py b/src/eo_datascience/_version.py new file mode 100644 index 0000000..c55373b --- /dev/null +++ b/src/eo_datascience/_version.py @@ -0,0 +1,2 @@ +__version__ = "v0.0.0" +__commit__ = "0000000" diff --git a/src/eo_datascience/clean_nb.py b/src/eo_datascience/clean_nb.py new file mode 100644 index 0000000..1a36827 --- /dev/null +++ b/src/eo_datascience/clean_nb.py @@ -0,0 +1,60 @@ +import os +import nbformat +from pathlib import Path + +def clean_up_frontmatter(dir = './notebooks', save=False): + # Define the path to the notebooks + nb_paths = find_ipynb(dir) + + # Iterate over the notebooks + for nb_path in nb_paths: + # Load the notebook + nb = nbformat.read(nb_path, as_version=4) + if nb.cells[0].source.startswith('---'): + #Load frontmatter + fm = nb.cells[0].source.split('\n') + + # Extract the title and the subtitle + title, subtitle = '', '' + for line in fm: + if line.startswith('title'): + title = line.split(': ')[1] + if line.startswith('subtitle'): + subtitle = line.split(': ')[1] + + # Update the cell + nb.cells[0].source = f'# {title}\n{subtitle}\n' + + # Save the notebook + if save: + nbformat.write(nb, nb_path) + else: + return nb + +def convert_refs(dir="./notebooks", save=True): + nb_paths = find_ipynb(dir) + + # Iterate over the notebooks + for nb_path in nb_paths: + # Load the notebook + nb = nbformat.read(nb_path, as_version=4) + for i in range(len(nb.cells)): + if i != 0: + nb.cells[i].source = nb.cells[i].source.replace(r"[@", r"{cite}`").replace(r"]", r"`") + + # Save the notebook + if save: + nbformat.write(nb, nb_path) + else: + return nb + +def find_ipynb(dir): + root = Path(dir).resolve() + nb_paths = [root / file for file in os.listdir(root) if file.endswith('.ipynb')] + return nb_paths + +def main(): + clean_up_frontmatter() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/render_sfinx_toc.py b/src/eo_datascience/render_sfinx_toc.py similarity index 100% rename from src/render_sfinx_toc.py rename to src/eo_datascience/render_sfinx_toc.py diff --git a/tests/mock.ipynb b/tests/mock.ipynb new file mode 100644 index 0000000..34c2609 --- /dev/null +++ b/tests/mock.ipynb @@ -0,0 +1,29 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "title: This a mock Jupyter file\n", + "subtitle: We use it for testing\n", + "author: anonymous\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[@ref1] " + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/test_quarto_nb_conversions.py b/tests/test_quarto_nb_conversions.py new file mode 100644 index 0000000..fcbeae9 --- /dev/null +++ b/tests/test_quarto_nb_conversions.py @@ -0,0 +1,10 @@ +import nbformat +from pathlib import Path +import pytest +from eo_datascience.clean_nb import clean_up_frontmatter, convert_refs + +def test_remove_front_matter(): + assert clean_up_frontmatter("./tests", False)["cells"][0]["source"] == "# This a mock Jupyter file\nWe use it for testing\n" + +def test_conversion_of_refs(): + assert convert_refs("./tests", False)["cells"][1]["source"] == r'{cite}`ref1` ' \ No newline at end of file