From ae37af29d4bd46f49776587f017faf2a556cecbc Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Mon, 29 Apr 2024 14:18:59 +0100 Subject: [PATCH 1/7] Update setting no crawl header Extract out into a common concern included in `ApplicationController` so this hook/helper is available to all controllers. --- app/controllers/application_controller.rb | 1 + app/controllers/attachment_masks_controller.rb | 4 ---- app/controllers/concerns/prominence_headers.rb | 2 +- app/controllers/concerns/public_tokenable.rb | 6 +----- app/controllers/concerns/robots_headers.rb | 12 ++++++++++++ 5 files changed, 15 insertions(+), 10 deletions(-) create mode 100644 app/controllers/concerns/robots_headers.rb diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index 8be5fd9a05..3141f5ab21 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -32,6 +32,7 @@ class RouteNotFound < StandardError include FastGettext::Translation # make functions like _, n_, N_ etc available) include AlaveteliPro::PostRedirectHandler + include RobotsHeaders # NOTE: a filter stops the chain if it redirects or renders something before_action :html_response diff --git a/app/controllers/attachment_masks_controller.rb b/app/controllers/attachment_masks_controller.rb index c397d8da99..6a42501449 100644 --- a/app/controllers/attachment_masks_controller.rb +++ b/app/controllers/attachment_masks_controller.rb @@ -32,10 +32,6 @@ def done private - def set_no_crawl_headers - headers['X-Robots-Tag'] = 'noindex' - end - def decode_referer @referer = verifier.verified(params[:referer]) end diff --git a/app/controllers/concerns/prominence_headers.rb b/app/controllers/concerns/prominence_headers.rb index 22f3f550ce..befc48039f 100644 --- a/app/controllers/concerns/prominence_headers.rb +++ b/app/controllers/concerns/prominence_headers.rb @@ -24,7 +24,7 @@ def set_normal_headers end def set_backpage_headers - headers['X-Robots-Tag'] = 'noindex' + set_no_crawl_headers end def set_requester_only_headers diff --git a/app/controllers/concerns/public_tokenable.rb b/app/controllers/concerns/public_tokenable.rb index 32f2795bae..8a80823e44 100644 --- a/app/controllers/concerns/public_tokenable.rb +++ b/app/controllers/concerns/public_tokenable.rb @@ -8,16 +8,12 @@ module PublicTokenable extend ActiveSupport::Concern included do - before_action :set_no_crawl_headers + before_action :set_no_crawl_headers, if: -> { public_token } helper_method :public_token end private - def set_no_crawl_headers - headers['X-Robots-Tag'] = 'noindex' if public_token - end - def public_token params[:public_token] end diff --git a/app/controllers/concerns/robots_headers.rb b/app/controllers/concerns/robots_headers.rb new file mode 100644 index 0000000000..3a21ae1299 --- /dev/null +++ b/app/controllers/concerns/robots_headers.rb @@ -0,0 +1,12 @@ +## +# Set robots noindex headers. +# +module RobotsHeaders + extend ActiveSupport::Concern + + private + + def set_no_crawl_headers + headers['X-Robots-Tag'] = 'noindex' + end +end From cc8ec4db1b4424dba281e59ca0a3c0c733e06feb Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Tue, 30 Apr 2024 08:32:03 +0100 Subject: [PATCH 2/7] Update actions that set no crawl headers Don't allow indexing of: - New citation page - New request page - Similar requests page - Request details page - User profile wall --- app/controllers/citations_controller.rb | 1 + app/controllers/request_controller.rb | 1 + app/controllers/user_controller.rb | 1 + spec/controllers/citations_controller_spec.rb | 5 +++++ spec/controllers/request_controller_spec.rb | 15 +++++++++++++++ spec/controllers/user_controller_spec.rb | 6 ++++++ 6 files changed, 29 insertions(+) diff --git a/app/controllers/citations_controller.rb b/app/controllers/citations_controller.rb index 5f26fe1ac2..ae58d26c19 100644 --- a/app/controllers/citations_controller.rb +++ b/app/controllers/citations_controller.rb @@ -5,6 +5,7 @@ class CitationsController < ApplicationController before_action :authenticate before_action :load_info_request_and_authorise before_action :set_in_pro_area + before_action :set_no_crawl_headers def new @citation = current_user.citations.build diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index 2214f4ccff..4c32ade500 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -15,6 +15,7 @@ class RequestController < ApplicationController before_action :redirect_new_form_to_pro_version, only: [:select_authority, :new] before_action :set_in_pro_area, only: [:select_authority, :show] before_action :setup_results_pagination, only: [:list, :similar] + before_action :set_no_crawl_headers, only: [:new, :details, :similar] helper_method :state_transitions_empty? diff --git a/app/controllers/user_controller.rb b/app/controllers/user_controller.rb index 3dd0977586..4f048cd78b 100644 --- a/app/controllers/user_controller.rb +++ b/app/controllers/user_controller.rb @@ -18,6 +18,7 @@ class UserController < ApplicationController before_action :work_out_post_redirect, only: [ :signup ] before_action :set_request_from_foreign_country, only: [ :signup ] before_action :set_in_pro_area, only: [ :signup ] + before_action :set_no_crawl_headers, only: :wall # Normally we wouldn't be verifying the authenticity token on these actions # anyway as there shouldn't be a user_id in the session when the before diff --git a/spec/controllers/citations_controller_spec.rb b/spec/controllers/citations_controller_spec.rb index bc39876f76..3a74f53538 100644 --- a/spec/controllers/citations_controller_spec.rb +++ b/spec/controllers/citations_controller_spec.rb @@ -37,6 +37,11 @@ expect(response).to be_successful end end + + it 'adds noindex header' do + action + expect(response.headers['X-Robots-Tag']).to eq 'noindex' + end end # when requester diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb index aea2f87221..ea676d113e 100644 --- a/spec/controllers/request_controller_spec.rb +++ b/spec/controllers/request_controller_spec.rb @@ -734,6 +734,11 @@ def expect_hidden(hidden_template) expect(response).to render_template('new_bad_contact') end + it 'adds noindex header' do + get :new, params: { public_body_id: @body.id } + expect(response.headers['X-Robots-Tag']).to eq 'noindex' + end + context "the outgoing message includes an email address" do context "there is no logged in user" do it "displays a flash error message without escaping the HTML" do @@ -1964,6 +1969,11 @@ def send_request get :similar, params: { url_title: badger_request.url_title } }.to raise_error(ActiveRecord::RecordNotFound) end + + it 'adds noindex header' do + get :similar, params: { url_title: badger_request.url_title } + expect(response.headers['X-Robots-Tag']).to eq 'noindex' + end end RSpec.describe RequestController, "when the site is in read_only mode" do @@ -2041,6 +2051,11 @@ def send_request }.to raise_error(ActiveRecord::RecordNotFound) end end + + it 'adds noindex header' do + get :details, params: { url_title: info_request.url_title } + expect(response.headers['X-Robots-Tag']).to eq 'noindex' + end end end diff --git a/spec/controllers/user_controller_spec.rb b/spec/controllers/user_controller_spec.rb index 654dfeff75..8f1b431d83 100644 --- a/spec/controllers/user_controller_spec.rb +++ b/spec/controllers/user_controller_spec.rb @@ -1351,4 +1351,10 @@ def make_request get :wall, params: { url_name: user.url_name } expect(assigns[:feed_results]).to be_empty end + + it 'adds noindex header' do + user = FactoryBot.create(:user) + get :wall, params: { url_name: user.url_name } + expect(response.headers['X-Robots-Tag']).to eq 'noindex' + end end From 3545f27f8e019c1963e1e8672c23b9cc91e2643c Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Mon, 29 Apr 2024 14:21:52 +0100 Subject: [PATCH 3/7] Add nofollow to no crawl headers --- app/controllers/concerns/robots_headers.rb | 2 +- .../attachment_masks_controller_spec.rb | 8 +++---- .../attachments_controller_spec.rb | 24 +++++++++---------- spec/controllers/citations_controller_spec.rb | 4 ++-- .../public_tokens_controller_spec.rb | 4 ++-- spec/controllers/request_controller_spec.rb | 16 ++++++------- spec/controllers/user_controller_spec.rb | 4 ++-- 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/app/controllers/concerns/robots_headers.rb b/app/controllers/concerns/robots_headers.rb index 3a21ae1299..4cc41b9c56 100644 --- a/app/controllers/concerns/robots_headers.rb +++ b/app/controllers/concerns/robots_headers.rb @@ -7,6 +7,6 @@ module RobotsHeaders private def set_no_crawl_headers - headers['X-Robots-Tag'] = 'noindex' + headers['X-Robots-Tag'] = 'noindex, nofollow' end end diff --git a/spec/controllers/attachment_masks_controller_spec.rb b/spec/controllers/attachment_masks_controller_spec.rb index 5d6b759671..8eb46254c6 100644 --- a/spec/controllers/attachment_masks_controller_spec.rb +++ b/spec/controllers/attachment_masks_controller_spec.rb @@ -54,9 +54,9 @@ def wait expect(response).to render_template(:wait) end - it 'sets noindex header' do + it 'sets noindex, nofollow header' do wait - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end end @@ -118,9 +118,9 @@ def done expect(response).to render_template(:done) end - it 'sets noindex header' do + it 'sets noindex, nofollow header' do done - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end end diff --git a/spec/controllers/attachments_controller_spec.rb b/spec/controllers/attachments_controller_spec.rb index cf2b870806..29574ea13c 100644 --- a/spec/controllers/attachments_controller_spec.rb +++ b/spec/controllers/attachments_controller_spec.rb @@ -198,13 +198,13 @@ def show(params = {}) expect(assigns(:info_request)).to eq(info_request) end - it 'adds noindex header when using public token' do + it 'adds noindex, nofollow header when using public token' do expect(InfoRequest).to receive(:find_by!).with(public_token: 'ABC'). and_return(info_request) show(public_token: 'ABC', id: nil) - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end it 'passes public token to current ability' do @@ -287,14 +287,14 @@ def show(params = {}) context 'when the request is backpage' do let(:request_prominence) { 'backpage' } - it 'sets a noindex header when viewing' do + it 'sets a noindex, nofollow header when viewing' do show - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end - it 'sets a noindex header when viewing a cached copy' do + it 'sets a noindex, nofollow header when viewing a cached copy' do show - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end context 'when logged in as requester' do @@ -449,13 +449,13 @@ def show_as_html(params = {}) expect(assigns(:info_request)).to eq(info_request) end - it 'adds noindex header when using public token' do + it 'adds noindex, nofollow header when using public token' do expect(InfoRequest).to receive(:find_by!).with(public_token: '123'). and_return(info_request) show_as_html(public_token: '123', id: nil) - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end context 'when attachment has not been masked' do @@ -560,14 +560,14 @@ def show_as_html(params = {}) context 'when the request is backpage' do let(:request_prominence) { 'backpage' } - it 'sets a noindex header when viewing a HTML version' do + it 'sets a noindex, nofollow header when viewing a HTML version' do show_as_html - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end - it 'sets a noindex header when viewing a cached HTML version' do + it 'sets a noindex, nofollow header when viewing a cached HTML version' do show_as_html - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end end diff --git a/spec/controllers/citations_controller_spec.rb b/spec/controllers/citations_controller_spec.rb index 3a74f53538..4d48ad5d30 100644 --- a/spec/controllers/citations_controller_spec.rb +++ b/spec/controllers/citations_controller_spec.rb @@ -38,9 +38,9 @@ end end - it 'adds noindex header' do + it 'adds noindex, nofollow header' do action - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end end diff --git a/spec/controllers/public_tokens_controller_spec.rb b/spec/controllers/public_tokens_controller_spec.rb index cf3b646189..77ab6927c4 100644 --- a/spec/controllers/public_tokens_controller_spec.rb +++ b/spec/controllers/public_tokens_controller_spec.rb @@ -25,9 +25,9 @@ expect(assigns(:info_request)).to eq info_request end - it 'adds noindex header' do + it 'adds noindex, nofollow header' do get :show, params: { public_token: 'TOKEN' } - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end it 'returns http success' do diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb index ea676d113e..5b7b2b7ee9 100644 --- a/spec/controllers/request_controller_spec.rb +++ b/spec/controllers/request_controller_spec.rb @@ -603,9 +603,9 @@ def expect_hidden(hidden_template) expect(response).to render_template('show') end - it 'sets a noindex header' do + it 'sets a noindex, nofollow header' do get :show, params: { url_title: info_request.url_title } - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end end end @@ -734,9 +734,9 @@ def expect_hidden(hidden_template) expect(response).to render_template('new_bad_contact') end - it 'adds noindex header' do + it 'adds noindex, nofollow header' do get :new, params: { public_body_id: @body.id } - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end context "the outgoing message includes an email address" do @@ -1970,9 +1970,9 @@ def send_request }.to raise_error(ActiveRecord::RecordNotFound) end - it 'adds noindex header' do + it 'adds noindex, nofollow header' do get :similar, params: { url_title: badger_request.url_title } - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end end @@ -2052,9 +2052,9 @@ def send_request end end - it 'adds noindex header' do + it 'adds noindex, nofollow header' do get :details, params: { url_title: info_request.url_title } - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end end end diff --git a/spec/controllers/user_controller_spec.rb b/spec/controllers/user_controller_spec.rb index 8f1b431d83..4747ee9d6e 100644 --- a/spec/controllers/user_controller_spec.rb +++ b/spec/controllers/user_controller_spec.rb @@ -1352,9 +1352,9 @@ def make_request expect(assigns[:feed_results]).to be_empty end - it 'adds noindex header' do + it 'adds noindex, nofollow header' do user = FactoryBot.create(:user) get :wall, params: { url_name: user.url_name } - expect(response.headers['X-Robots-Tag']).to eq 'noindex' + expect(response.headers['X-Robots-Tag']).to eq 'noindex, nofollow' end end From 1ea6cfc32daaeb43a1f7a8f970e79ac0f9ed34b9 Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Tue, 30 Apr 2024 08:37:27 +0100 Subject: [PATCH 4/7] Add nofollow to request action menu links These actions require a user to be logged in or link to actions which we don't allow to be indexed and as such there is no reason for search indexers to follow them. --- app/views/request/_after_actions.html.erb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/app/views/request/_after_actions.html.erb b/app/views/request/_after_actions.html.erb index 0b187ecd7c..784fa66a0e 100644 --- a/app/views/request/_after_actions.html.erb +++ b/app/views/request/_after_actions.html.erb @@ -14,20 +14,20 @@
  • <% if last_response.nil? %> - <%= link_to _('Send a followup'), new_request_followup_path(info_request.url_title) %> + <%= link_to _('Send a followup'), new_request_followup_path(info_request.url_title), rel: 'nofollow' %> <% else %> - <%= link_to _('Write a reply'), new_request_incoming_followup_path(info_request.url_title, incoming_message_id: last_response.id) %> + <%= link_to _('Write a reply'), new_request_incoming_followup_path(info_request.url_title, incoming_message_id: last_response.id), rel: 'nofollow' %> <% end %>
  • <% if show_owner_update_status_action %>
  • - <%= link_to _('Update the status of this request'), request_path(info_request, update_status: 1) %> + <%= link_to _('Update the status of this request'), request_path(info_request, update_status: 1), rel: 'nofollow' %>
  • <% end %>
  • - <%= link_to _('Request an internal review'), new_request_followup_path(info_request.url_title, internal_review: '1') %> + <%= link_to _('Request an internal review'), new_request_followup_path(info_request.url_title, internal_review: '1'), rel: 'nofollow' %>
@@ -42,7 +42,7 @@
  • - <%= link_to _("Respond to request"), upload_response_path(:url_title => info_request.url_title) %> + <%= link_to _("Respond to request"), upload_response_path(:url_title => info_request.url_title), rel: 'nofollow' %>
@@ -61,7 +61,7 @@ <% else %>
  • - <%= link_to _("Report this request"), new_request_report_path(info_request.url_title) %> + <%= link_to _("Report this request"), new_request_report_path(info_request.url_title), rel: 'nofollow' %> <%= link_to _("Help"), help_about_path(:anchor => "reporting") %>
  • @@ -69,7 +69,7 @@ <% if feature_enabled?(:annotations) && info_request.comments_allowed? %>
  • - <%= link_to _('Add an annotation'), new_comment_path(:url_title => info_request.url_title) %> + <%= link_to _('Add an annotation'), new_comment_path(:url_title => info_request.url_title), rel: 'nofollow' %>
  • <% end %> @@ -80,11 +80,11 @@ <% end %>
  • - <%= link_to _("Download a zip file of all correspondence"), download_entire_request_path(:url_title => info_request.url_title) %> + <%= link_to _("Download a zip file of all correspondence"), download_entire_request_path(:url_title => info_request.url_title), rel: 'nofollow' %>
  • - <%= link_to _('View event history details'), request_details_path(info_request) %> + <%= link_to _('View event history details'), request_details_path(info_request), rel: 'nofollow' %>
  • From 218c529ad49d760a06625f8047e2cedf9a84e795 Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Tue, 30 Apr 2024 08:34:41 +0100 Subject: [PATCH 5/7] Add no crawl meta tags to HTML head Even though we're setting the response header ensure we set a consistent value in the meta tag. --- app/controllers/concerns/robots_headers.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/controllers/concerns/robots_headers.rb b/app/controllers/concerns/robots_headers.rb index 4cc41b9c56..5d0b35ac1a 100644 --- a/app/controllers/concerns/robots_headers.rb +++ b/app/controllers/concerns/robots_headers.rb @@ -7,6 +7,7 @@ module RobotsHeaders private def set_no_crawl_headers + @no_crawl = true headers['X-Robots-Tag'] = 'noindex, nofollow' end end From 8a7203d30ae813f8dd0e8540056fe21faaf2cd07 Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Mon, 29 Apr 2024 14:21:18 +0100 Subject: [PATCH 6/7] Update all paginated pages to set no crawl headers Pages after the first shouldn't be crawled, this will help with site performance. --- app/controllers/concerns/robots_headers.rb | 4 ++++ app/controllers/request_controller.rb | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/app/controllers/concerns/robots_headers.rb b/app/controllers/concerns/robots_headers.rb index 5d0b35ac1a..79f6d53232 100644 --- a/app/controllers/concerns/robots_headers.rb +++ b/app/controllers/concerns/robots_headers.rb @@ -4,6 +4,10 @@ module RobotsHeaders extend ActiveSupport::Concern + included do + before_action :set_no_crawl_headers, if: -> { params[:page].to_i > 1 } + end + private def set_no_crawl_headers diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index 4c32ade500..3df37507ad 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -471,9 +471,6 @@ def setup_results_pagination @per_page = PER_PAGE @max_results = MAX_RESULTS - # Don't let robots go more than 20 pages in - @no_crawl = true if @page > 20 - # Later pages are very expensive to load return if @page <= MAX_RESULTS / PER_PAGE From a55f6c2b1ec1d174af8defc9b2c04c30e064b42e Mon Sep 17 00:00:00 2001 From: Graeme Porteous Date: Tue, 30 Apr 2024 09:16:45 +0100 Subject: [PATCH 7/7] Update changelog --- doc/CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/CHANGES.md b/doc/CHANGES.md index b48aa4fcb8..ec3c7c3a9f 100644 --- a/doc/CHANGES.md +++ b/doc/CHANGES.md @@ -2,6 +2,8 @@ ## Highlighted Features +* Update actions and pages which set "noindex", "nofollow" crawler directives + (Graeme Porteous) * Add default value and not null constraint to `CensorRule#regexp` (Gareth Rees) * Allow requests to be listed and filtered by tag (Graeme Porteous) * Fix admin error when authority are missing an email address (Graeme Porteous)