Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enforce TOC validation in docs build #18974

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions src/current/check_sidebar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import os
from bs4 import BeautifulSoup

# Directory paths (relative to the script)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DOCS_DIR = BASE_DIR
SIDEBAR_DIR = os.path.join(BASE_DIR, "_site/docs/_internal")


# Define excluded files and directories from your configuration
EXCLUDED_FILES_AND_DIRS = [
"index.html", "index.md", "search.html", "404.md",
"src/current/v23.1/", "v23.1/", "node_modules/",
"__tests__", "build", "scripts", "vendor/", "_includes/",
"_site/", "jekyll-algolia-dev/", "jekyll-algolia-dev", "archived",
"package.json", "package-lock.json",
"v1.0/", "v1.1/", "v2.0/", "v2.1/",
"v19.1/", "v19.2/", "v20.1/", "v20.2/",
"v21.1/", "v22.1/", "v24.2/", "v23.2/",
"v21.2/", "v22.2/", "v24.1/", "ci"
]

def is_excluded(path):
"""
Check if a given path should be excluded based on the exclusion list.
"""
for excluded in EXCLUDED_FILES_AND_DIRS:
if excluded.endswith('/'): # Check for directories
if path.startswith(excluded):
return True
else: # Check for files
if path == excluded:
return True
return False

def extract_sidebar_urls(sidebar_file):
"""
Extract URLs from a sidebar HTML file.
"""
sidebar_urls = []
with open(sidebar_file, 'r') as f:
soup = BeautifulSoup(f, 'html.parser')
for link in soup.find_all('a'):
href = link.get('href')
if href:
# Append only the part after /docs/
sidebar_urls.append(href.split("/docs/")[-1])
return sidebar_urls

def find_markdown_files(docs_dir):
"""
Find all Markdown files in the specified directory, excluding certain files and directories.
"""
markdown_files = []
for root, _, files in os.walk(docs_dir):
for file in files:
if file.endswith('.md'):
file_path = os.path.join(root, file)
relative_path = os.path.relpath(file_path, docs_dir)

# Check exclusion
if not is_excluded(relative_path):
markdown_files.append(relative_path) # Store the relative path
return markdown_files

def main():
# Extract sidebar URLs
sidebar_urls = []
for sidebar_file in os.listdir(SIDEBAR_DIR):
if sidebar_file.startswith("sidebar-") and sidebar_file.endswith(".html"):
sidebar_path = os.path.join(SIDEBAR_DIR, sidebar_file)
sidebar_urls.extend(extract_sidebar_urls(sidebar_path))

# Find all Markdown files that should be included after exclusions
markdown_files = find_markdown_files(DOCS_DIR)

# Compare Markdown files with sidebar URLs to find missing files
missing_files = []
for md_file in markdown_files:
md_file_url = f"/docs/{md_file[:-3]}.html" # Convert .md to .html
if md_file_url.split("/docs/")[-1] not in sidebar_urls:
missing_files.append(md_file)

# Print Markdown files that are not included in the sidebar
print("\n--- Markdown Files Not Included in Sidebar ---")
for missing in missing_files:
print(missing)

if __name__ == "__main__":
main()
Loading