From 044aa42324a27bdbcca0f0d0ca841285108a017a Mon Sep 17 00:00:00 2001
From: MartinSchobben <schobbenmartin@gmail.com>
Date: Wed, 14 Aug 2024 16:55:40 +0200
Subject: [PATCH] add dev tools an unittests

---
 environment.yml                              |  9 +++
 pyproject.toml                               |  3 +
 setup.cfg                                    | 34 +++++++++
 setup.py                                     |  3 +
 src/clean-nb.py                              | 73 --------------------
 src/eo_datascience/__init__.py               |  4 ++
 src/eo_datascience/_version.py               |  2 +
 src/eo_datascience/clean_nb.py               | 60 ++++++++++++++++
 src/{ => eo_datascience}/render_sfinx_toc.py |  0
 tests/mock.ipynb                             | 29 ++++++++
 tests/test_quarto_nb_conversions.py          | 10 +++
 11 files changed, 154 insertions(+), 73 deletions(-)
 create mode 100644 environment.yml
 create mode 100644 pyproject.toml
 create mode 100644 setup.cfg
 create mode 100644 setup.py
 delete mode 100644 src/clean-nb.py
 create mode 100644 src/eo_datascience/__init__.py
 create mode 100644 src/eo_datascience/_version.py
 create mode 100644 src/eo_datascience/clean_nb.py
 rename src/{ => eo_datascience}/render_sfinx_toc.py (100%)
 create mode 100644 tests/mock.ipynb
 create mode 100644 tests/test_quarto_nb_conversions.py

diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..3728b1f
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,9 @@
+name: eo-datascience
+channels:
+  - conda-forge
+dependencies:
+  - python=3.10
+  - pip
+  - mamba
+  - jupyter
+  - nbformat
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..7fd26b9
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..5a13289
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,34 @@
+[metadata]
+name = eo_datascience
+version = attr: eo_datascience.__version__
+description = Examples of TUWien Jupyter notebooks for education
+author = TU Wien GEO MRS group
+author_email = martin.schobben@geo.tuwien.ac.at
+long_description = file: README.md
+url = https://github.com/TUW-GEO/eo-datascience
+platforms = any
+classifiers =
+    Intended Audience :: Science/Research
+    Topic :: Scientific/Engineering
+    Topic :: Scientific/Engineering :: GIS
+    Topic :: Software Development :: Libraries
+    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Operating System :: POSIX
+    Natural Language :: English
+
+[options]
+package_dir =
+    = src
+packages = find:
+install_requires =
+    nbformat
+
+[options.packages.find]
+where = src
+
+[options.extras_require]
+test =
+    pytest
+    pytest-cov
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..6068493
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,3 @@
+from setuptools import setup
+
+setup()
diff --git a/src/clean-nb.py b/src/clean-nb.py
deleted file mode 100644
index dff6e1e..0000000
--- a/src/clean-nb.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import os
-import nbformat
-from pathlib import Path
-from bs4 import BeautifulSoup
-
-
-def clean_up_frontmatter():
-    # Define the path to the notebooks
-    root = Path('./notebooks').resolve()
-    nb_paths = [root / file for file in os.listdir(root) if file.endswith('.ipynb')]
-
-    # Iterate over the notebooks
-    for nb_path in nb_paths:
-        # Load the notebook
-        nb = nbformat.read(nb_path, as_version=4)
-        if nb.cells[0].source.startswith('---'):
-            #Load frontmatter
-            fm = nb.cells[0].source.split('\n')
-
-            # Extract the title and the subtitle
-            title, subtitle = '', ''
-            for line in fm:
-                if line.startswith('title'):
-                    title = line.split(': ')[1]
-                if line.startswith('subtitle'):
-                    subtitle = line.split(': ')[1]
-            
-            # Update the cell
-            nb.cells[0].source = f'# {title}\n{subtitle}\n'
-            
-            # Save the notebook
-            nbformat.write(nb, nb_path)
-
-def clean_up_references():
-    # Load the references.html file
-    html_file_path = Path('_book/chapters/references.html')
-    with open(html_file_path, 'r', encoding='utf-8') as file:
-        html_content = file.read()
-
-    # Parse the HTML content
-    soup = BeautifulSoup(html_content)
-    references_div = soup.find('div', {'id': 'refs', 'class': 'references csl-bib-body hanging-indent'})
-
-    # Format the references as string
-    references_list = []
-    for ref in references_div.get_text().split('\n\n\n'):
-        ref = ref.replace('\n\n', '')
-        ref = ref.replace('\n', ' ')
-        references_list.append(ref)
-
-    # Indent the references
-    #ref_list = ['\t' + ref for ref in references_list]
-
-    # Merge the references into a single string
-    output_str = '\n\n'.join(references_list)
-
-    # Load the References notebook
-    ref_nb_path = Path('./notebooks/references.ipynb').resolve()
-    nb = nbformat.read(ref_nb_path, as_version=4)
-
-    # Update the cell
-    nb.cells[0].source = f'# References\n\n{output_str}'
-
-    # Save the notebook
-    nbformat.write(nb, ref_nb_path)
-
-def main():
-    clean_up_frontmatter()
-    clean_up_references()
-
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/src/eo_datascience/__init__.py b/src/eo_datascience/__init__.py
new file mode 100644
index 0000000..09ba5cd
--- /dev/null
+++ b/src/eo_datascience/__init__.py
@@ -0,0 +1,4 @@
+from eo_datascience._version import __commit__
+from eo_datascience._version import __version__
+
+name = "eo_datascience"
\ No newline at end of file
diff --git a/src/eo_datascience/_version.py b/src/eo_datascience/_version.py
new file mode 100644
index 0000000..c55373b
--- /dev/null
+++ b/src/eo_datascience/_version.py
@@ -0,0 +1,2 @@
+__version__ = "v0.0.0"
+__commit__ = "0000000"
diff --git a/src/eo_datascience/clean_nb.py b/src/eo_datascience/clean_nb.py
new file mode 100644
index 0000000..1a36827
--- /dev/null
+++ b/src/eo_datascience/clean_nb.py
@@ -0,0 +1,60 @@
+import os
+import nbformat
+from pathlib import Path
+
+def clean_up_frontmatter(dir = './notebooks', save=False):
+    # Define the path to the notebooks
+    nb_paths = find_ipynb(dir)
+
+    # Iterate over the notebooks
+    for nb_path in nb_paths:
+        # Load the notebook
+        nb = nbformat.read(nb_path, as_version=4)
+        if nb.cells[0].source.startswith('---'):
+            #Load frontmatter
+            fm = nb.cells[0].source.split('\n')
+
+            # Extract the title and the subtitle
+            title, subtitle = '', ''
+            for line in fm:
+                if line.startswith('title'):
+                    title = line.split(': ')[1]
+                if line.startswith('subtitle'):
+                    subtitle = line.split(': ')[1]
+            
+            # Update the cell
+            nb.cells[0].source = f'# {title}\n{subtitle}\n'
+            
+            # Save the notebook
+            if save:
+                nbformat.write(nb, nb_path)
+            else:
+                return nb
+
+def convert_refs(dir="./notebooks", save=True):
+    nb_paths = find_ipynb(dir)
+    
+    # Iterate over the notebooks
+    for nb_path in nb_paths:
+        # Load the notebook
+        nb = nbformat.read(nb_path, as_version=4)
+        for i in range(len(nb.cells)):
+            if i != 0:
+                nb.cells[i].source = nb.cells[i].source.replace(r"[@", r"{cite}`").replace(r"]", r"`")
+        
+        # Save the notebook
+        if save:
+            nbformat.write(nb, nb_path)
+        else:
+            return nb
+
+def find_ipynb(dir):
+    root = Path(dir).resolve()
+    nb_paths = [root / file for file in os.listdir(root) if file.endswith('.ipynb')]
+    return nb_paths
+
+def main():
+    clean_up_frontmatter()
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/src/render_sfinx_toc.py b/src/eo_datascience/render_sfinx_toc.py
similarity index 100%
rename from src/render_sfinx_toc.py
rename to src/eo_datascience/render_sfinx_toc.py
diff --git a/tests/mock.ipynb b/tests/mock.ipynb
new file mode 100644
index 0000000..34c2609
--- /dev/null
+++ b/tests/mock.ipynb
@@ -0,0 +1,29 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "title: This a mock Jupyter file\n",
+    "subtitle: We use it for testing\n",
+    "author: anonymous\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[@ref1] "
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tests/test_quarto_nb_conversions.py b/tests/test_quarto_nb_conversions.py
new file mode 100644
index 0000000..fcbeae9
--- /dev/null
+++ b/tests/test_quarto_nb_conversions.py
@@ -0,0 +1,10 @@
+import nbformat
+from pathlib import Path
+import pytest
+from eo_datascience.clean_nb import clean_up_frontmatter, convert_refs
+
+def test_remove_front_matter():
+    assert clean_up_frontmatter("./tests", False)["cells"][0]["source"] == "# This a mock Jupyter file\nWe use it for testing\n"
+
+def test_conversion_of_refs():
+    assert convert_refs("./tests", False)["cells"][1]["source"] == r'{cite}`ref1` '
\ No newline at end of file