deploy: b3ee320

ricosjp · Nov 20, 2024 · 3e026a8 · 3e026a8
commit 3e026a8
Show file tree

Hide file tree

Showing 133 changed files with 19,537 additions and 0 deletions.
diff --git a/.nojekyll b/.nojekyll
diff --git a/_downloads/0953145e59ada4e8a11b742a56988ff4/02_model_definition_by_yaml_file.py b/_downloads/0953145e59ada4e8a11b742a56988ff4/02_model_definition_by_yaml_file.py
@@ -0,0 +1,50 @@
+"""
+.. _second:
+
+
+Model Definition by Yaml file
+----------------------------------------------------
+
+Phlower offers a way to define models and its order by yaml file.
+
+"""
+
+
+###################################################################################################
+# First of all, we would like to load sample yaml data. Please download sample sample yaml.
+# `data.yml
+# <https://github.com/ricosjp/phlower/tutorials/basic_usages/sample_data/model/model.yml>`_
+#
+# we construct PhlowerSetting object from yaml file.
+
+from phlower.settings import PhlowerSetting
+
+setting = PhlowerSetting.read_yaml("sample_data/model/model.yml")
+
+
+###################################################################################################
+# Order of models must be DAG (Directed Acyclic Graph).
+# To check such conditions, we call `resolve` function.
+
+setting.model.network.resolve(is_first=True)
+
+
+###################################################################################################
+# In phlower, networks are packed into PhlowerGroupModule.
+# PhlowerGroupModule is directly created from model setting.
+#
+# `draw` function generate a file following to mermaid format.
+
+from phlower.nn import PhlowerGroupModule
+
+model = PhlowerGroupModule.from_setting(setting.model.network)
+model.draw("images")
+
+
+###################################################################################################
+# The output mermaid file is shown below.
+#
+# .. mermaid:: ../../tutorials/basic_usages/images/SAMPLE_MODEL.mmd
+#
+# According to this image, we can understand details of each model and data flow.
+#
diff --git a/...4c6068b55102daf56ad0a3da6baf3/03_high_level_api_for_scaling_training_and_prediction.ipynb b/...4c6068b55102daf56ad0a3da6baf3/03_high_level_api_for_scaling_training_and_prediction.ipynb
@@ -0,0 +1,122 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n\n\n# High Level API for scaling, training and prediction\n\nIn this section, we will use high level API for performing machine learning process.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "At First, we will prepare dummy data.\nThese dummy data corresponds to feature values extracted from simultion data.\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import pathlib\nimport random\nimport shutil\n\nimport numpy as np\nimport scipy.sparse as sp\n\n\ndef prepare_sample_interim_files():\n    np.random.seed(0)\n    random.seed(0)\n\n    output_directory = pathlib.Path(\"out\")\n    if output_directory.exists():\n        shutil.rmtree(output_directory)\n\n    base_interim_dir = output_directory / \"interim\"\n    base_interim_dir.mkdir(parents=True)\n\n    n_cases = 5\n    dtype = np.float32\n    for i in range(n_cases):\n        n_nodes = 100 * (i + 1)\n        interim_dir = base_interim_dir / f\"case_{i}\"\n        interim_dir.mkdir()\n\n        nodal_initial_u = np.random.rand(n_nodes, 3, 1)\n        np.save(\n            interim_dir / \"nodal_initial_u.npy\",\n            nodal_initial_u.astype(dtype),\n        )\n\n        # nodal_last_u = np.random.rand(n_nodes, 3, 1)\n        np.save(interim_dir / \"nodal_last_u.npy\", nodal_initial_u.astype(dtype))\n\n        sparse_array_names = [\n            \"nodal_nadj\",\n            \"nodal_x_grad_hop1\",\n            \"nodal_y_grad_hop1\",\n            \"nodal_z_grad_hop1\",\n        ]\n        rng = np.random.default_rng()\n        for name in sparse_array_names:\n            arr = sp.random(n_nodes, n_nodes, density=0.1, random_state=rng)\n            sp.save_npz(interim_dir / name, arr.tocoo().astype(dtype))\n\n        (interim_dir / \"converted\").touch()\n\n\nprepare_sample_interim_files()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Setting file for scaling and training can be downloaded from\n[data.yml](https://github.com/ricosjp/phlower/tutorials/basic_usages/sample_data/e2e/setting.yml)\nwe perform scaling process for data above.\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from phlower.services.preprocessing import PhlowerScalingService\nfrom phlower.settings import PhlowerSetting\n\nsetting = PhlowerSetting.read_yaml(\"sample_data/e2e/setting.yml\")\n\nscaler = PhlowerScalingService.from_setting(setting)\nscaler.fit_transform_all(\n    interim_data_directories=[\n        pathlib.Path(\"out/interim/case_0\"),\n        pathlib.Path(\"out/interim/case_1\"),\n        pathlib.Path(\"out/interim/case_2\"),\n        pathlib.Path(\"out/interim/case_3\"),\n        pathlib.Path(\"out/interim/case_4\"),\n    ],\n    output_base_directory=pathlib.Path(\"out/preprocessed\"),\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Next, we perform training by using preprocessed data.\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from phlower.services.trainer import PhlowerTrainer\n\ntrainer = PhlowerTrainer.from_setting(setting)\n\nloss = trainer.train(\n    train_directories=[\n        pathlib.Path(\"out/preprocessed/case_0\"),\n        pathlib.Path(\"out/preprocessed/case_1\"),\n        pathlib.Path(\"out/preprocessed/case_2\"),\n    ],\n    validation_directories=[\n        pathlib.Path(\"out/preprocessed/case_3\"),\n        pathlib.Path(\"out/preprocessed/case_4\"),\n    ],\n    output_directory=pathlib.Path(\"out/model\"),\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "``train`` function returns PhlowerTensor object which corresponds to last validation loss.\nLet's call print it.\n\nWe can find that loss object has physical dimension and it is L^2 T^(-2)\nbecause we use MSE (Mean Squared Error) as a loss function.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "print(loss)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Finally, we perform predicion by using pretrained model.\nSetting file for prediction can be downloaded from\n[data.yml](https://github.com/ricosjp/phlower/tutorials/basic_usages/sample_data/e2e/predict.yml)\n\nIt is found that physical dimension is also considered properly.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from phlower.services.predictor import PhlowerPredictor\n\nsetting = PhlowerSetting.read_yaml(\"sample_data/e2e/predict.yml\")\n\npredictor = PhlowerPredictor(\n    model_directory=pathlib.Path(\"out/model\"),\n    predict_setting=setting.prediction,\n)\n\npreprocessed_directories = [pathlib.Path(\"out/preprocessed/case_3\")]\n\nfor result in predictor.predict(preprocessed_directories):\n    for k in result.keys():\n        print(f\"{k}: {result[k].dimension}\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.15"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/...15af1fc81b615371e46cf85a99e84a46/03_high_level_api_for_scaling_training_and_prediction.py b/...15af1fc81b615371e46cf85a99e84a46/03_high_level_api_for_scaling_training_and_prediction.py
@@ -0,0 +1,145 @@
+"""
+.. _third:
+
+
+High Level API for scaling, training and prediction
+----------------------------------------------------
+
+In this section, we will use high level API for performing machine learning process.
+
+"""
+
+###################################################################################################
+# At First, we will prepare dummy data.
+# These dummy data corresponds to feature values extracted from simultion data.
+#
+
+import pathlib
+import random
+import shutil
+
+import numpy as np
+import scipy.sparse as sp
+
+
+def prepare_sample_interim_files():
+    np.random.seed(0)
+    random.seed(0)
+
+    output_directory = pathlib.Path("out")
+    if output_directory.exists():
+        shutil.rmtree(output_directory)
+
+    base_interim_dir = output_directory / "interim"
+    base_interim_dir.mkdir(parents=True)
+
+    n_cases = 5
+    dtype = np.float32
+    for i in range(n_cases):
+        n_nodes = 100 * (i + 1)
+        interim_dir = base_interim_dir / f"case_{i}"
+        interim_dir.mkdir()
+
+        nodal_initial_u = np.random.rand(n_nodes, 3, 1)
+        np.save(
+            interim_dir / "nodal_initial_u.npy",
+            nodal_initial_u.astype(dtype),
+        )
+
+        # nodal_last_u = np.random.rand(n_nodes, 3, 1)
+        np.save(interim_dir / "nodal_last_u.npy", nodal_initial_u.astype(dtype))
+
+        sparse_array_names = [
+            "nodal_nadj",
+            "nodal_x_grad_hop1",
+            "nodal_y_grad_hop1",
+            "nodal_z_grad_hop1",
+        ]
+        rng = np.random.default_rng()
+        for name in sparse_array_names:
+            arr = sp.random(n_nodes, n_nodes, density=0.1, random_state=rng)
+            sp.save_npz(interim_dir / name, arr.tocoo().astype(dtype))
+
+        (interim_dir / "converted").touch()
+
+
+prepare_sample_interim_files()
+
+###################################################################################################
+# Setting file for scaling and training can be downloaded from
+# `data.yml
+# <https://github.com/ricosjp/phlower/tutorials/basic_usages/sample_data/e2e/setting.yml>`_
+# we perform scaling process for data above.
+#
+
+from phlower.services.preprocessing import PhlowerScalingService
+from phlower.settings import PhlowerSetting
+
+setting = PhlowerSetting.read_yaml("sample_data/e2e/setting.yml")
+
+scaler = PhlowerScalingService.from_setting(setting)
+scaler.fit_transform_all(
+    interim_data_directories=[
+        pathlib.Path("out/interim/case_0"),
+        pathlib.Path("out/interim/case_1"),
+        pathlib.Path("out/interim/case_2"),
+        pathlib.Path("out/interim/case_3"),
+        pathlib.Path("out/interim/case_4"),
+    ],
+    output_base_directory=pathlib.Path("out/preprocessed"),
+)
+
+
+###################################################################################################
+# Next, we perform training by using preprocessed data.
+#
+
+from phlower.services.trainer import PhlowerTrainer
+
+trainer = PhlowerTrainer.from_setting(setting)
+
+loss = trainer.train(
+    train_directories=[
+        pathlib.Path("out/preprocessed/case_0"),
+        pathlib.Path("out/preprocessed/case_1"),
+        pathlib.Path("out/preprocessed/case_2"),
+    ],
+    validation_directories=[
+        pathlib.Path("out/preprocessed/case_3"),
+        pathlib.Path("out/preprocessed/case_4"),
+    ],
+    output_directory=pathlib.Path("out/model"),
+)
+
+###################################################################################################
+# ``train`` function returns PhlowerTensor object which corresponds to last validation loss.
+# Let's call print it.
+#
+# We can find that loss object has physical dimension and it is L^2 T^(-2)
+# because we use MSE (Mean Squared Error) as a loss function.
+
+print(loss)
+
+
+###################################################################################################
+# Finally, we perform predicion by using pretrained model.
+# Setting file for prediction can be downloaded from
+# `data.yml
+# <https://github.com/ricosjp/phlower/tutorials/basic_usages/sample_data/e2e/predict.yml>`_
+#
+# It is found that physical dimension is also considered properly.
+
+from phlower.services.predictor import PhlowerPredictor
+
+setting = PhlowerSetting.read_yaml("sample_data/e2e/predict.yml")
+
+predictor = PhlowerPredictor(
+    model_directory=pathlib.Path("out/model"),
+    predict_setting=setting.prediction,
+)
+
+preprocessed_directories = [pathlib.Path("out/preprocessed/case_3")]
+
+for result in predictor.predict(preprocessed_directories):
+    for k in result.keys():
+        print(f"{k}: {result[k].dimension}")