Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sitemap index and serve sitemap for subprojects #10629

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion docs/user/reference/sitemaps.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,18 @@ It contains information such as:
* How important this URL is in relation to other URLs in the site.
* What translations are available for a page.

Read the Docs automatically generates a ``sitemap.xml`` for your project,
Read the Docs automatically generates a ``sitemap.xml`` and a
``sitemap_index.xml`` for your project,

By default the sitemap includes:

* Each version of your documentation and when it was last updated, sorted by version number.

By default the sitemap index includes:

* The location of ``sitemap.xml``
* The locations of the ``sitemap.xml`` of subprojects if they are set.

This allows search engines to prioritize results based on the version number,
sorted by `semantic versioning`_.

Expand Down
49 changes: 47 additions & 2 deletions readthedocs/proxito/tests/test_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -850,7 +850,7 @@ def test_default_robots_txt(self, storage_exists):

Disallow: # Allow everything

Sitemap: https://project.readthedocs.io/sitemap.xml
Sitemap: https://project.readthedocs.io/sitemap_index.xml
"""
).lstrip()
self.assertEqual(response.content.decode(), expected)
Expand Down Expand Up @@ -904,7 +904,7 @@ def test_default_robots_txt_disallow_hidden_versions(self, storage_exists):

Disallow: /en/hidden/ # Hidden version

Sitemap: https://project.readthedocs.io/sitemap.xml
Sitemap: https://project.readthedocs.io/sitemap_index.xml
"""
).lstrip()
self.assertEqual(response.content.decode(), expected)
Expand Down Expand Up @@ -1733,6 +1733,51 @@ def test_sitemap_all_private_versions(self):
)
self.assertEqual(response.status_code, 404)

def test_sitemap_subproject(self):
self.project.versions.update(active=True)
self.subproject.versions.update(active=True)

subresponse = self.client.get(
reverse("sitemap_xml", args=["subproject"]),
headers={"host": "project.readthedocs.io"},
)
response = self.client.get(
reverse("sitemap_xml"), headers={"host": "subproject.readthedocs.io"}
)

self.assertEqual(subresponse.status_code, 200)
self.assertEqual(response.status_code, 200)
self.assertEqual(subresponse.content, response.content)

def test_sitemap_index(self):
self.project.versions.update(active=True)
response = self.client.get(
reverse("sitemap_index_xml"), headers={"host": "project.readthedocs.io"}
)
self.assertEqual(response.status_code, 200)
self.assertEqual(response["Content-Type"], "application/xml")
expected = dedent(
"""
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">

<sitemap>
<loc>https://project.readthedocs.io/sitemap.xml</loc>
</sitemap>

<sitemap>
<loc>https://project.readthedocs.io/projects/subproject/sitemap.xml</loc>
</sitemap>

<sitemap>
<loc>https://project.readthedocs.io/projects/subproject-alias/sitemap.xml</loc>
</sitemap>

</sitemapindex>
"""
).lstrip()
self.assertEqual(response.content.decode(), expected)

@mock.patch(
"readthedocs.proxito.views.mixins.staticfiles_storage",
new=StaticFileSystemStorageTest(),
Expand Down
11 changes: 10 additions & 1 deletion readthedocs/proxito/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
ServeError404,
ServePageRedirect,
ServeRobotsTXT,
ServeSitemapIndexXML,
ServeSitemapXML,
ServeStaticFiles,
)
Expand Down Expand Up @@ -136,7 +137,15 @@
name="proxito_404_handler",
),
re_path(r"robots\.txt$", ServeRobotsTXT.as_view(), name="robots_txt"),
re_path(r"sitemap\.xml$", ServeSitemapXML.as_view(), name="sitemap_xml"),
re_path(
r"^(?:projects/(?P<subproject_slug>{project_slug})/)?"
r"sitemap\.xml$".format(**pattern_opts),
ServeSitemapXML.as_view(),
name="sitemap_xml",
),
re_path(
r"sitemap_index\.xml$", ServeSitemapIndexXML.as_view(), name="sitemap_index_xml"
),
]

docs_urls = [
Expand Down
65 changes: 62 additions & 3 deletions readthedocs/proxito/views/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,8 @@ def get(self, request):
pass

# Serve default robots.txt
sitemap_url = "{scheme}://{domain}/sitemap.xml".format(
scheme="https",
sitemap_url = '{scheme}://{domain}/sitemap_index.xml'.format(
scheme='https',
domain=project.subdomain(),
)
context = {
Expand Down Expand Up @@ -813,7 +813,7 @@ class ServeSitemapXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View):
# Extra cache tag to invalidate only this view if needed.
project_cache_tag = "sitemap.xml"

def get(self, request):
def get(self, request, subproject_slug=None):
"""
Generate and serve a ``sitemap.xml`` for a particular ``project``.

Expand Down Expand Up @@ -872,6 +872,12 @@ def changefreqs_generator():
yield from itertools.chain(changefreqs, itertools.repeat("monthly"))

project = request.unresolved_domain.project

if subproject_slug:
project = get_object_or_404(
project.subprojects, alias=subproject_slug
).child

public_versions = Version.internal.public(
project=project,
only_active=True,
Expand Down Expand Up @@ -970,6 +976,59 @@ class ServeSitemapXML(SettingsOverrideObject):
_default_class = ServeSitemapXMLBase


class ServeSitemapIndexXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View):

"""Serve sitemap_index.xml from the domain's root."""

cache_response = True
project_cache_tag = "sitemap.xml"

def get(self, request):
"""
Generate and serve a ``sitemap_index.xml`` for a ``project``.

The sitemap index is generated from the project and all sub-projects.
"""

project = request.unresolved_domain.project

locations = [
"{scheme}://{domain}/sitemap.xml".format(
scheme="https",
domain=project.subdomain(),
)
]
for subproject in project.related_projects.all():
locations.append(
"{scheme}://{domain}/projects/{subproject}/sitemap.xml".format(
scheme="https",
domain=project.subdomain(),
subproject=subproject.slug,
)
)
context = {
"locations": locations,
}
return render(
request,
"sitemap_index.xml",
context,
content_type="application/xml",
)

def _get_project(self):
# Method used by the CDNCacheTagsMixin class.
return self.request.unresolved_domain.project

def _get_version(self):
# This view isn't attached to a version.
return None


class ServeSitemapIndexXML(SettingsOverrideObject):
_default_class = ServeSitemapIndexXMLBase


class ServeStaticFiles(CDNCacheControlMixin, CDNCacheTagsMixin, ServeDocsMixin, View):

"""
Expand Down
8 changes: 8 additions & 0 deletions readthedocs/templates/sitemap_index.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% for loc in locations %}
<sitemap>
<loc>{{ loc }}</loc>
</sitemap>
{% endfor %}
</sitemapindex>