Skip to content

Commit

Permalink
Add sitemap index and serve sitemap for subprojects
Browse files Browse the repository at this point in the history
The sitemap index lists all the sitemap locations for a project
including the subprojects.
The sitemap of subprojects are also served from the parent domain.

Closes #6841
  • Loading branch information
cedk committed Aug 14, 2023
1 parent f4efd14 commit dc4c6fb
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 3 deletions.
8 changes: 7 additions & 1 deletion docs/user/reference/sitemaps.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,18 @@ It contains information such as:
* How important this URL is in relation to other URLs in the site.
* What translations are available for a page.

Read the Docs automatically generates a ``sitemap.xml`` for your project,
Read the Docs automatically generates a ``sitemap.xml`` and a
``sitemap_index.xml`` for your project,

By default the sitemap includes:

* Each version of your documentation and when it was last updated, sorted by version number.

By default the sitemap index includes:

* The location of ``sitemap.xml``
* The locations of the ``sitemap.xml`` of subprojects if they are set.

This allows search engines to prioritize results based on the version number,
sorted by `semantic versioning`_.

Expand Down
45 changes: 45 additions & 0 deletions readthedocs/proxito/tests/test_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -1504,6 +1504,51 @@ def test_sitemap_all_private_versions(self):
)
self.assertEqual(response.status_code, 404)

def test_sitemap_subproject(self):
self.project.versions.update(active=True)
self.subproject.versions.update(active=True)

subresponse = self.client.get(
reverse("sitemap_xml", args=["subproject"]),
headers={"host": "project.readthedocs.io"},
)
response = self.client.get(
reverse("sitemap_xml"), headers={"host": "subproject.readthedocs.io"}
)

self.assertEqual(subresponse.status_code, 200)
self.assertEqual(response.status_code, 200)
self.assertEqual(subresponse.content, response.content)

def test_sitemap_index(self):
self.project.versions.update(active=True)
response = self.client.get(
reverse("sitemap_index_xml"), headers={"host": "project.readthedocs.io"}
)
self.assertEqual(response.status_code, 200)
self.assertEqual(response["Content-Type"], "application/xml")
expected = dedent(
"""
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap>
<loc>https://project.readthedocs.io/sitemap.xml</loc>
</sitemap>
<sitemap>
<loc>https://project.readthedocs.io/projects/subproject/sitemap.xml</loc>
</sitemap>
<sitemap>
<loc>https://project.readthedocs.io/projects/subproject-alias/sitemap.xml</loc>
</sitemap>
</sitemapindex>
"""
).lstrip()
self.assertEqual(response.content.decode(), expected)

@mock.patch(
"readthedocs.proxito.views.mixins.staticfiles_storage",
new=StaticFileSystemStorageTest(),
Expand Down
11 changes: 10 additions & 1 deletion readthedocs/proxito/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
ServeError404,
ServePageRedirect,
ServeRobotsTXT,
ServeSitemapIndexXML,
ServeSitemapXML,
ServeStaticFiles,
)
Expand Down Expand Up @@ -136,7 +137,15 @@
name="proxito_404_handler",
),
re_path(r"robots\.txt$", ServeRobotsTXT.as_view(), name="robots_txt"),
re_path(r"sitemap\.xml$", ServeSitemapXML.as_view(), name="sitemap_xml"),
re_path(
r"^(?:projects/(?P<subproject_slug>{project_slug})/)?"
r"sitemap\.xml$".format(**pattern_opts),
ServeSitemapXML.as_view(),
name="sitemap_xml",
),
re_path(
r"sitemap_index\.xml$", ServeSitemapIndexXML.as_view(), name="sitemap_index_xml"
),
]

docs_urls = [
Expand Down
61 changes: 60 additions & 1 deletion readthedocs/proxito/views/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,7 +1019,7 @@ class ServeSitemapXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View):
# Extra cache tag to invalidate only this view if needed.
project_cache_tag = "sitemap.xml"

def get(self, request):
def get(self, request, subproject_slug=None):
"""
Generate and serve a ``sitemap.xml`` for a particular ``project``.
Expand Down Expand Up @@ -1078,6 +1078,12 @@ def changefreqs_generator():
yield from itertools.chain(changefreqs, itertools.repeat('monthly'))

project = request.unresolved_domain.project

if subproject_slug:
project = get_object_or_404(
project.subprojects, alias=subproject_slug
).child

public_versions = Version.internal.public(
project=project,
only_active=True,
Expand Down Expand Up @@ -1164,6 +1170,59 @@ class ServeSitemapXML(SettingsOverrideObject):
_default_class = ServeSitemapXMLBase


class ServeSitemapIndexXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View):

"""Serve sitemap_index.xml from the domain's root."""

cache_response = True
project_cache_tag = "sitemap.xml"

def get(self, request):
"""
Generate and serve a ``sitemap_index.xml`` for a ``project``.
The sitemap index is generated from the project and all sub-projects.
"""

project = request.unresolved_domain.project

locations = [
"{scheme}://{domain}/sitemap.xml".format(
scheme="https",
domain=project.subdomain(),
)
]
for subproject in project.related_projects.all():
locations.append(
"{scheme}://{domain}/projects/{subproject}/sitemap.xml".format(
scheme="https",
domain=project.subdomain(),
subproject=subproject.slug,
)
)
context = {
"locations": locations,
}
return render(
request,
"sitemap_index.xml",
context,
content_type="application/xml",
)

def _get_project(self):
# Method used by the CDNCacheTagsMixin class.
return self.request.unresolved_domain.project

def _get_version(self):
# This view isn't attached to a version.
return None


class ServeSitemapIndexXML(SettingsOverrideObject):
_default_class = ServeSitemapIndexXMLBase


class ServeStaticFiles(CDNCacheControlMixin, CDNCacheTagsMixin, ServeDocsMixin, View):

"""
Expand Down
8 changes: 8 additions & 0 deletions readthedocs/templates/sitemap_index.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% for loc in locations %}
<sitemap>
<loc>{{ loc }}</loc>
</sitemap>
{% endfor %}
</sitemapindex>

0 comments on commit dc4c6fb

Please sign in to comment.