From 357b8e34286cf7f67e083315dec3909f32cc6e0e Mon Sep 17 00:00:00 2001 From: M Bussonnier Date: Thu, 12 Sep 2024 11:20:09 +0200 Subject: [PATCH] cleanup a few diverging links --- docs/community/topics/dependencies-js.md | 2 +- docs/community/topics/manual-dev.md | 4 ++-- docs/user_guide/accessibility.md | 2 +- docs/user_guide/indices.rst | 2 +- tools/divergent_links.py | 27 ++++++++++++++++++++---- 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/docs/community/topics/dependencies-js.md b/docs/community/topics/dependencies-js.md index 3887b360d..04bd0a830 100644 --- a/docs/community/topics/dependencies-js.md +++ b/docs/community/topics/dependencies-js.md @@ -8,7 +8,7 @@ There are two kinds of dependency definitions in this theme: To update or add a JS dependency, follow these steps: 1. **Edit `package.json`** by adding or modifying a dependency. -2. **Re-generate `package-lock.json`** in order to create a new set of frozen dependencies for the theme. To do this, run the following command from [the Sphinx Theme Builder](https://github.com/pradyunsg/sphinx-theme-builder). +2. **Re-generate `package-lock.json`** in order to create a new set of frozen dependencies for the theme. To do this, run the following command from [the Sphinx Theme Builder](https://sphinx-theme-builder.readthedocs.io/en/latest/). ``` stb npm install --include=dev diff --git a/docs/community/topics/manual-dev.md b/docs/community/topics/manual-dev.md index 5c952633a..c9e79675e 100644 --- a/docs/community/topics/manual-dev.md +++ b/docs/community/topics/manual-dev.md @@ -18,7 +18,7 @@ To do so, use a tool like [conda](https://docs.conda.io/en/latest/), [mamba](htt Before you start, ensure that you have the following installed: - Python >= 3.9 -- [Pandoc](https://pandoc.org/installing.html): we use `nbsphinx` to support notebook (.ipynb) files in the documentation, which requires [installing Pandoc](https://pandoc.org/installing.html) at a system level (or within a Conda environment). +- [Pandoc](https://pandoc.org/): we use `nbsphinx` to support notebook (`.ipynb`) files in the documentation, which requires [installing Pandoc](https://pandoc.org/installing.html) at a system level (or within a Conda environment). ## Clone the repository locally @@ -66,7 +66,7 @@ To manually open a server to watch your documentation for changes, build them, a $ stb serve docs --open-browser ``` -## Run the tests +## Manually Run the tests To manually run the tests for this theme, first set up your environment locally, and then run: diff --git a/docs/user_guide/accessibility.md b/docs/user_guide/accessibility.md index 01ca24329..3fe7d70a7 100644 --- a/docs/user_guide/accessibility.md +++ b/docs/user_guide/accessibility.md @@ -69,7 +69,7 @@ Site maps, usually served from a file called `sitemap.xml` are a broadly-employe approach to telling programs like search engines and assistive technologies where different content appears on a website. -If using a service like [ReadTheDocs](https://readthedocs.com), these files +If using a service like [ReadTheDocs](https://about.readthedocs.com/), these files will be created for you _automatically_, but for some other approaches below, it's handy to generate a `sitemap.xml` locally or in CI with a tool like [sphinx-sitemap](https://pypi.org/project/sphinx-sitemap/). diff --git a/docs/user_guide/indices.rst b/docs/user_guide/indices.rst index 6bbb9c279..cd1d29e83 100644 --- a/docs/user_guide/indices.rst +++ b/docs/user_guide/indices.rst @@ -19,4 +19,4 @@ By design the indices pages are not linked in a documentation generated with thi .. note:: - Don't forget to add back the ``"sidebar-ethical-ads.html"`` template if you are serving your documentation using `ReadTheDocs `__. + Don't forget to add back the ``"sidebar-ethical-ads.html"`` template if you are serving your documentation using `ReadTheDocs `__. diff --git a/tools/divergent_links.py b/tools/divergent_links.py index 7b7dbc40e..3cd5ceced 100644 --- a/tools/divergent_links.py +++ b/tools/divergent_links.py @@ -10,7 +10,17 @@ from bs4 import BeautifulSoup -ignores = ["#", "next", "previous"] +ignores = [ + "#", + "next", + "previous", + "[source]", + "edit on github", + "[docs]", + "read more ...", + "show source", + "module", +] def find_html_files(folder_path): @@ -48,17 +58,26 @@ def scan(self, html_content, identifier): continue if content.split("\n")[0] in ignores: continue + from urllib.parse import urljoin - self.links[content].append((url, identifier)) + fullurl = urljoin(identifier, url) + self.links[content].append((fullurl, identifier)) def duplicates(self): """Print potential duplicates.""" for content, url_pages in self.links.items(): uniq_url = {u for u, _ in url_pages} if len(uniq_url) >= 2: - print(f"{content} has divergent url:") + print( + f"{len(url_pages)} time {content!r} has {len(uniq_url)} on divergent url on :" + ) + dct = defaultdict(list) for u, p in url_pages: - print(" ", u, "in", p) + dct[u].append(p) + for u, ps in dct.items(): + print(" ", u, "in") + for p in ps: + print(" ", p) # Example usage