Skip to content

Commit

Permalink
added filter for the notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
npikall committed Aug 1, 2024
1 parent 3a5f1e1 commit 0db369e
Show file tree
Hide file tree
Showing 8 changed files with 1,037 additions and 981 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
on:
workflow_dispatch:
push:
branches: main
branches: [main, dev]

name: Quarto Publish

Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ post-render:
for i in $(QMD); do quarto convert $$i; done
- mv chapters/*.ipynb notebooks/ >/dev/null 2>&1
- for f in chapters/*.quarto_ipynb ; do mv -- "$f" "${f%.quarto_ipynb}.ipynb" >/dev/null 2>&1; done
python assets/filters/clean-nb.py
cp Makefile notebooks/

data:
Expand Down
73 changes: 73 additions & 0 deletions assets/filters/clean-nb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os
import nbformat
from pathlib import Path
from bs4 import BeautifulSoup


def clean_up_frontmatter():
# Define the path to the notebooks
root = Path('./notebooks').resolve()
nb_paths = [root / file for file in os.listdir(root) if file.endswith('.ipynb')]

# Iterate over the notebooks
for nb_path in nb_paths:
# Load the notebook
nb = nbformat.read(nb_path, as_version=4)
if nb.cells[0].source.startswith('---'):
#Load frontmatter
fm = nb.cells[0].source.split('\n')

# Extract the title and the subtitle
title, subtitle = '', ''
for line in fm:
if line.startswith('title'):
title = line.split(': ')[1]
if line.startswith('subtitle'):
subtitle = line.split(': ')[1]

# Update the cell
nb.cells[0].source = f'# {title}\n{subtitle}\n'

# Save the notebook
nbformat.write(nb, nb_path)

def clean_up_references():
# Load the references.html file
html_file_path = Path('_book/chapters/references.html')
with open(html_file_path, 'r', encoding='utf-8') as file:
html_content = file.read()

# Parse the HTML content
soup = BeautifulSoup(html_content)
references_div = soup.find('div', {'id': 'refs', 'class': 'references csl-bib-body hanging-indent'})

# Format the references as string
references_list = []
for ref in references_div.get_text().split('\n\n\n'):
ref = ref.replace('\n\n', '')
ref = ref.replace('\n', ' ')
references_list.append(ref)

# Indent the references
#ref_list = ['\t' + ref for ref in references_list]

# Merge the references into a single string
output_str = '\n\n'.join(references_list)

# Load the References notebook
ref_nb_path = Path('./notebooks/references.ipynb').resolve()
nb = nbformat.read(ref_nb_path, as_version=4)

# Update the cell
nb.cells[0].source = f'# References\n\n{output_str}'

# Save the notebook
nbformat.write(nb, ref_nb_path)

def main():
clean_up_frontmatter()
clean_up_references()


if __name__ == '__main__':
main()
6 changes: 3 additions & 3 deletions chapters/01_classification.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,7 @@ ds_odc = odc.stac.load(
)
# actually load it
with ProgressBar():
ds_odc.load()
ds_odc.load()
```

## Data Visualization
Expand Down Expand Up @@ -192,7 +191,7 @@ def normalized_difference(a, b):
return (a - b*1.) / (a + b)
ndvi = normalized_difference(ds_odc.nir, ds_odc.red)
ndvi.median(dim="time").plot.imshow(cmap='cmc.cork').axes.set_title('NDVI')
ndvi.median(dim="time").plot.imshow(cmap='cmc.cork', vmin=-1, vmax=1).axes.set_title('NDVI')
plt.show()
```

Expand Down Expand Up @@ -435,5 +434,6 @@ for p in ax.patches:
ax.annotate(f'{p.get_height():.1f}%', (p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', xytext=(0, 9), textcoords='offset points')
```

## Conclusion
In this chapter, we utilized machine learning to classify satellite imagery into forested and non-forested areas, comparing Naive Bayes and Random Forest classifiers. The Random Forest classifier generally outperformed Naive Bayes, with fewer errors in classification, although it misclassified the Danube River as forested, while Naive Bayes incorrectly identified cropland as forest. The analysis, supported by the bar chart, revealed that about 18% of the scene was classified as forest, 66% as non-forest, and the remainder included ambiguous categories. This comparison highlights the strengths and limitations of each classifier, underscoring the need for careful selection and evaluation of classification methods.
Loading

0 comments on commit 0db369e

Please sign in to comment.