added filter for the notebooks

TUW-GEO · Aug 1, 2024 · 0db369e · 0db369e
1 parent 3a5f1e1
commit 0db369e
Show file tree

Hide file tree

Showing 8 changed files with 1,037 additions and 981 deletions.
diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
@@ -1,7 +1,7 @@
 on:
   workflow_dispatch:
   push:
-    branches: main
+    branches: [main, dev]
 
 name: Quarto Publish
 

diff --git a/Makefile b/Makefile
@@ -38,6 +38,7 @@ post-render:
 	for i in $(QMD); do quarto convert $$i; done
 	- mv chapters/*.ipynb notebooks/ >/dev/null 2>&1
 	- for f in chapters/*.quarto_ipynb ; do mv -- "$f" "${f%.quarto_ipynb}.ipynb"  >/dev/null 2>&1; done
+	python assets/filters/clean-nb.py
 	cp Makefile notebooks/
 
 data:

diff --git a/assets/filters/clean-nb.py b/assets/filters/clean-nb.py
@@ -0,0 +1,73 @@
+import os
+import nbformat
+from pathlib import Path
+from bs4 import BeautifulSoup
+
+
+def clean_up_frontmatter():
+    # Define the path to the notebooks
+    root = Path('./notebooks').resolve()
+    nb_paths = [root / file for file in os.listdir(root) if file.endswith('.ipynb')]
+
+    # Iterate over the notebooks
+    for nb_path in nb_paths:
+        # Load the notebook
+        nb = nbformat.read(nb_path, as_version=4)
+        if nb.cells[0].source.startswith('---'):
+            #Load frontmatter
+            fm = nb.cells[0].source.split('\n')
+
+            # Extract the title and the subtitle
+            title, subtitle = '', ''
+            for line in fm:
+                if line.startswith('title'):
+                    title = line.split(': ')[1]
+                if line.startswith('subtitle'):
+                    subtitle = line.split(': ')[1]
+
+            # Update the cell
+            nb.cells[0].source = f'# {title}\n{subtitle}\n'
+
+            # Save the notebook
+            nbformat.write(nb, nb_path)
+
+def clean_up_references():
+    # Load the references.html file
+    html_file_path = Path('_book/chapters/references.html')
+    with open(html_file_path, 'r', encoding='utf-8') as file:
+        html_content = file.read()
+
+    # Parse the HTML content
+    soup = BeautifulSoup(html_content)
+    references_div = soup.find('div', {'id': 'refs', 'class': 'references csl-bib-body hanging-indent'})
+
+    # Format the references as string
+    references_list = []
+    for ref in references_div.get_text().split('\n\n\n'):
+        ref = ref.replace('\n\n', '')
+        ref = ref.replace('\n', ' ')
+        references_list.append(ref)
+
+    # Indent the references
+    #ref_list = ['\t' + ref for ref in references_list]
+
+    # Merge the references into a single string
+    output_str = '\n\n'.join(references_list)
+
+    # Load the References notebook
+    ref_nb_path = Path('./notebooks/references.ipynb').resolve()
+    nb = nbformat.read(ref_nb_path, as_version=4)
+
+    # Update the cell
+    nb.cells[0].source = f'# References\n\n{output_str}'
+
+    # Save the notebook
+    nbformat.write(nb, ref_nb_path)
+
+def main():
+    clean_up_frontmatter()
+    clean_up_references()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/chapters/01_classification.qmd b/chapters/01_classification.qmd
@@ -116,8 +116,7 @@ ds_odc = odc.stac.load(
 )
 
 # actually load it
-with ProgressBar():
-    ds_odc.load()
+ds_odc.load()
 ```
 
 ## Data Visualization
@@ -192,7 +191,7 @@ def normalized_difference(a, b):
     return (a - b*1.) / (a + b)
 
 ndvi = normalized_difference(ds_odc.nir, ds_odc.red)
-ndvi.median(dim="time").plot.imshow(cmap='cmc.cork').axes.set_title('NDVI')
+ndvi.median(dim="time").plot.imshow(cmap='cmc.cork', vmin=-1, vmax=1).axes.set_title('NDVI')
 plt.show()
 ```
 
@@ -435,5 +434,6 @@ for p in ax.patches:
     ax.annotate(f'{p.get_height():.1f}%', (p.get_x() + p.get_width() / 2., p.get_height()), 
                 ha='center', va='center', xytext=(0, 9), textcoords='offset points')
 ```
+
 ## Conclusion
 In this chapter, we utilized machine learning to classify satellite imagery into forested and non-forested areas, comparing Naive Bayes and Random Forest classifiers. The Random Forest classifier generally outperformed Naive Bayes, with fewer errors in classification, although it misclassified the Danube River as forested, while Naive Bayes incorrectly identified cropland as forest. The analysis, supported by the bar chart, revealed that about 18% of the scene was classified as forest, 66% as non-forest, and the remainder included ambiguous categories. This comparison highlights the strengths and limitations of each classifier, underscoring the need for careful selection and evaluation of classification methods.