diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css index 2716c6c39fb..ffacd2b187b 100644 --- a/docs/source/_static/css/custom.css +++ b/docs/source/_static/css/custom.css @@ -68,21 +68,23 @@ --pst-icon-admonition-important: var(--pst-icon-exclamation-circle); } -/* Small Device */ +/* Mobile First Approach */ .bd-page-width { width: 100%; padding: 10px; box-sizing: border-box; + margin: 0 auto; } -/* Medium Size */ +/* Tablet */ @media (min-width: 768px) { .bd-page-width { padding: 20px; + max-width: 95%; } } -/* Desktop Size */ +/* Desktop */ @media (min-width: 960px) { .bd-page-width { max-width: 90%; @@ -90,60 +92,85 @@ } } -@media (max-width: 959px) { - .bd-sidebar-primary { - width: 80%; - max-width: 100%; - height: 100%; - position: fixed; - z-index: 1050; - top: 0; - left: 0; - overflow-y: auto; - padding: 1rem; - box-sizing: border-box; - border-right: none; - } +/* Mobile Sidebar */ +.bd-sidebar-primary { + width: 100%; + height: 100vh; + position: fixed; + z-index: 1050; + top: 0; + left: -100%; + overflow-y: auto; + padding: 1rem; + box-sizing: border-box; + background: white; + transition: left 0.3s ease-in-out; } -/* Reduce Sidebar Width to 20% */ +.bd-sidebar-primary.show { + left: 0; +} + +/* Tablet and Desktop Sidebar */ @media (min-width: 960px) { .bd-sidebar-primary { flex: 0 0 20%; - max-width: 20%; - padding: 2rem 1rem 1rem; + min-width: 250px; + max-width: 400px; + width: 20%; position: sticky; + left: 0; top: var(--pst-header-height); height: calc(100vh - var(--pst-header-height)); overflow-y: auto; + overflow-x: auto; border-right: 1px solid var(--pst-color-border); + resize: horizontal; + padding: 2rem 1rem 1rem; + -webkit-overflow-scrolling: touch; + } + + .bd-sidebar-primary .bd-links__item { + white-space: normal; + word-wrap: break-word; + } + + .bd-sidebar-primary::-webkit-scrollbar { + width: 6px; + } + + .bd-sidebar-primary::-webkit-scrollbar-thumb { + background-color: rgba(0, 0, 0, 0.2); + border-radius: 3px; } } +/* Main Content */ .bd-main .bd-content .bd-article-container { max-width: 100%; padding: 1rem; + margin: 0 auto; } -@media (max-width: 959px) { - .navbar-toggler { - display: block; - } -} - +/* Navigation Toggle Button */ .navbar-toggler { + display: block; + position: fixed; + top: 10px; + right: 10px; padding: 0.25rem 0.75rem; font-size: 1.25rem; line-height: 1; - /* background: rgba(0,0,0,.1); */ border: none; border-radius: 0.25rem; + z-index: 1060; + background: transparent; } -.navbar-toggler { - position: absolute; - top: 10px; - right: 10px; +@media (min-width: 960px) { + .navbar-toggler { + display: none; + } } /* Hide Section Navigation Title */ @@ -156,10 +183,29 @@ nav.bd-links p.bd-links__title { display: none; } +/* Navbar */ .navbar { background: #0095ca !important; + position: sticky; + top: 0; + z-index: 2000; + width: 100%; } +/* Video Container */ .video { text-align: center; + position: relative; + padding-bottom: 56.25%; /* 16:9 Aspect Ratio */ + height: 0; + overflow: hidden; + max-width: 100%; +} + +.video iframe { + position: absolute; + top: 0; + left: 0; + width: 100%; + height: 100%; } diff --git a/docs/source/conf.py b/docs/source/conf.py index a679c843358..d239b33921a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -42,6 +42,20 @@ "sphinx.ext.intersphinx", # Generate links to the documentation "sphinx_tabs.tabs", "sphinx_design", + "myst_parser", # Enhanced markdown support + "sphinx.ext.todo", # Support for TODO items + "sphinx.ext.githubpages", # GitHub Pages support + "sphinx.ext.coverage", # Documentation coverage check +] + +myst_enable_extensions = [ + "colon_fence", + "deflist", + "dollarmath", + "fieldlist", + "html_image", + "linkify", + "tasklist", ] source_suffix = { @@ -69,6 +83,13 @@ html_theme = "pydata_sphinx_theme" html_static_path = ["_static"] +# Show source link & copyright +html_show_sourcelink = True +html_show_sphinx = False +html_show_copyright = True +html_copy_source = True + + html_theme_options = { "navbar_center": [], "navbar_end": ["search-field.html", "theme-switcher.html", "navbar-icon-links.html"], @@ -81,11 +102,23 @@ { "name": "GitHub", "url": "https://github.com/openvinotoolkit/training_extensions", - "icon": "_static/logos/github_icon.png", - "type": "local", + "icon": "fab fa-github", + "type": "fontawesome", }, ], + "use_edit_page_button": True, + "show_nav_level": 3, + "navigation_depth": 6, + "show_toc_level": 3, } + +html_context = { + "github_user": "openvinotoolkit", + "github_repo": "training_extensions", + "github_version": "master", + "doc_path": "docs/source/", +} + html_css_files = [ "css/custom.css", ] diff --git a/docs/source/guide/explanation/algorithms/diffusion/index.rst b/docs/source/guide/explanation/algorithms/diffusion/index.rst index cac0b848be5..7ac9558c60e 100644 --- a/docs/source/guide/explanation/algorithms/diffusion/index.rst +++ b/docs/source/guide/explanation/algorithms/diffusion/index.rst @@ -12,28 +12,30 @@ At the moment, the diffusion task supports the COCO captions dataset format: .. code-block:: -└─ Dataset/ - ├── dataset_meta.json # a list of custom labels (optional) - ├── images/ - │ ├── train/ - │ │ ├── - │ │ ├── - │ │ └── ... - │ └── val/ - │ ├── - │ ├── - │ └── ... - └── annotations/ - ├── _.json - └── ... + └─ Dataset/ + ├── dataset_meta.json # a list of custom labels (optional) + ├── images/ + │ ├── train/ + │ │ ├── + │ │ ├── + │ │ └── ... + │ └── val/ + │ ├── + │ ├── + │ └── ... + └── annotations/ + ├── _.json + └── ... Models -****** +******* As mentioned above, the goal of diffusion is to learn a generative model that can progressively transform a random noise vector into a realistic sample from a given data distribution. This process involves adding noise to the input data in a controlled manner and then training a model to reverse this process, gradually refining the noise into a meaningful output. Diffusion models are particularly effective at capturing complex patterns and dependencies in the data, making them suitable for a wide range of generative tasks. OpenVINO Training Extensions supports Stable Diffusion pipeline v1.4 that consists of 3 models: - - text encoder (CLIP ViT-L/14), - - autoencoder - - diffusion model (UNet) + +- text encoder (CLIP ViT-L/14) +- autoencoder +- diffusion model (UNet) + Pipeline is based on HuggingFace implementation pre-trained on LAION-5B dataset. In OpenVINO Training Extensions, we use the fine-tuning approach to train the model on the target dataset. @@ -42,9 +44,10 @@ Training Parameters ~~~~~~~~~~~~~~~~~~~~ The following parameters can be changed during training: + - ``Loss``: Loss is computed as the mean squared error between target noise and predicted noise. The default loss is ``MSE`` and cannot be changed. - ``Optimizer``: The default optimizer is ``AdamW`` and cannot be changed. It uses the following parameters that can be changed: - - ``Learning Rate``: The default learning rate is ``0.00001``. - - ``Betas``: The default betas are is ``[0.9, 0.999]``. - - ``Weight Decay``: The default weight decay is ``0.01``. - - ``Epsilon``: The default epsilon is ``1e-8``. + - ``Learning Rate``: The default learning rate is ``0.00001``. + - ``Betas``: The default betas are is ``[0.9, 0.999]``. + - ``Weight Decay``: The default weight decay is ``0.01``. + - ``Epsilon``: The default epsilon is ``1e-8``. diff --git a/docs/source/guide/get_started/api_tutorial.rst b/docs/source/guide/get_started/api_tutorial.rst index b170875affc..f741a0cc910 100644 --- a/docs/source/guide/get_started/api_tutorial.rst +++ b/docs/source/guide/get_started/api_tutorial.rst @@ -295,7 +295,7 @@ The datamodule used by the Engine is of type ``otx.core.data.module.OTXDataModul from otx.core.data.module import OTXDataModule from otx.engine import Engine - datamodule = OTXDataModule(data_root="data/wgisd") + datamodule = OTXDataModule(data_root="data/wgisd", ...) engine = Engine(datamodule=datamodule) engine.train() @@ -304,6 +304,15 @@ The datamodule used by the Engine is of type ``otx.core.data.module.OTXDataModul If both ``data_root`` and ``datamodule`` enter ``Engine`` as input, ``Engine`` uses datamodule as the base. +.. tip:: + + You can get DataModule more easily using AutoConfigurator. + + .. code-block:: python + + from otx.engine.utils.auto_configuration import AutoConfigurator + + datamodule = AutoConfigurator(data_root="data/wgisd").get_datamodule() 5. You can use train-specific arguments with ``train()`` function. diff --git a/docs/source/guide/get_started/introduction.rst b/docs/source/guide/get_started/introduction.rst index 056fa0bddab..65a9e6d4389 100644 --- a/docs/source/guide/get_started/introduction.rst +++ b/docs/source/guide/get_started/introduction.rst @@ -9,7 +9,7 @@ Introduction **OpenVINO™ Training Extensions** is a low-code transfer learning framework for Computer Vision. -The CLI commands of the framework or API allows users to train, infer, optimize and deploy models easily and quickly even with low expertise in the deep learning field. OpenVINO™ Training Extensions offers diverse combinations of model architectures, learning methods, and task types based on `PyTorch `_ , `Lightning `_ and `OpenVINO™ toolkit `_. +The framework's CLI commands and API allow users to easily train, infer, optimize and deploy models, even with limited deep learning expertise. OpenVINO™ Training Extensions offers diverse combinations of model architectures, learning methods, and task types based on `PyTorch `_ , `Lightning `_ and `OpenVINO™ toolkit `_. OpenVINO™ Training Extensions provide `recipe `_ for every supported task type, which consolidates necessary information to build a model. Model templates are validated on various datasets and serve one-stop shop for obtaining the best models in general. If you are an experienced user, you can configure your own model based on `torchvision `_, `mmcv `_ and `OpenVINO Model Zoo (OMZ) `_ frameworks. @@ -127,13 +127,6 @@ Documentation content Learn how to train an action classification model - .. grid-item-card:: Action Detection - :link: ../tutorials/base/how_to_train/action_detection - :link-type: doc - :text-align: center - - Learn how to train an action detection model - .. grid-item-card:: Visual Prompting :link: ../tutorials/base/how_to_train/visual_prompting :link-type: doc @@ -166,8 +159,8 @@ This section consists of an algorithms explanation and describes additional feat 4. **Reference**: -This section gives an overview of the OpenVINO™ Training Extensions code base. There source code for Entities, classes and functions can be found. +This section gives an overview of the OpenVINO™ Training Extensions code base, where source code for Entities, classes and functions can be found. 5. **Release Notes**: -There can be found a description of new and previous releases. +This section contains descriptions of current and previous releases. diff --git a/pyproject.toml b/pyproject.toml index 0f92eb238c8..e65cb591bb0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,8 @@ docs = [ "sphinx-autoapi", "sphinxemoji", "nbsphinx", + "myst-parser>=2.0.0", + "linkify-it-py", ] base = [ diff --git a/src/otx/algo/__init__.py b/src/otx/algo/__init__.py index 8783a095993..86e3c6942b1 100644 --- a/src/otx/algo/__init__.py +++ b/src/otx/algo/__init__.py @@ -9,6 +9,22 @@ ) __all__ = [ - "strategies", "accelerators", + "action_classification", + "anomaly", + "callbacks", + "classification", + "common", + "detection", + "diffusion", + "keypoint_detection", + "modules", + "object_detection_3d", + "plugins", + "samplers", + "segmentation", + "strategies", + "strategies", + "utils", + "visual_prompting", ] diff --git a/src/otx/algo/classification/__init__.py b/src/otx/algo/classification/__init__.py index cb32d2f0fb4..8c9236c0c1a 100644 --- a/src/otx/algo/classification/__init__.py +++ b/src/otx/algo/classification/__init__.py @@ -5,4 +5,13 @@ from . import backbones, heads, losses -__all__ = ["backbones", "heads", "losses"] +__all__ = [ + "backbones", + "efficientnet", + "heads", + "losses", + "mobilenet_v3", + "timm_model", + "torchvision_model", + "vit", +]