stackhpc · priteau · Oct 11, 2024 · Nov 15, 2023 · Oct 30, 2023 · Jan 16, 2023
@@ -4,29 +4,21 @@
 # building their configurations.
 - name: Gather facts for all hosts
   hosts: all
-  any_errors_fatal: "{{ kolla_ansible_setup_any_errors_fatal | bool }}"
   serial: '{{ kolla_serial|default("0") }}'
   gather_facts: false
   tasks:
+    - name: Group hosts to determine when using --limit
+      group_by:
+        key: "all_using_limit_{{ (ansible_play_batch | length) != (groups['all'] | length) }}"
+      changed_when: false
+
     - name: Gather facts
       setup:
         filter: "{{ kolla_ansible_setup_filter }}"
         gather_subset: "{{ kolla_ansible_setup_gather_subset }}"
       when:
         # Don't gather if fact caching is in use
         - not ansible_facts
-
-    - name: Gather package facts
-      package_facts:
-      when:
-        - "'packages' not in ansible_facts"
-        - kolla_action is defined
-        - kolla_action == "precheck"
-
-    - name: Group hosts to determine when using --limit
-      group_by:
-        key: "all_using_limit_{{ (ansible_play_batch | length) != (groups['all'] | length) }}"
-      changed_when: false
   tags: always
 
 # NOTE(pbourke): This case covers deploying subsets of hosts using --limit. The
@@ -37,7 +29,6 @@
 # the limit.
 - name: Gather facts for all hosts (if using --limit)
   hosts: all_using_limit_True
-  any_errors_fatal: "{{ kolla_ansible_setup_any_errors_fatal | bool }}"
   serial: '{{ kolla_serial|default("0") }}'
   gather_facts: false
   vars:
@@ -60,14 +51,4 @@
         - item not in groups["all_using_limit_True"]
         # Don't gather if fact caching is in use
         - not hostvars[item].ansible_facts
-
-    - name: Gather package facts
-      package_facts:
-      delegate_facts: True
-      delegate_to: "{{ item }}"
-      with_items: "{{ delegate_hosts }}"
-      when:
-        - "'packages' not in hostvars[item].ansible_facts"
-        - kolla_action is defined
-        - "kolla_action == 'precheck'"
   tags: always
@@ -36,14 +36,6 @@ kolla_ansible_setup_filter: "{{ omit }}"
 # By default, we do not provide a gather subset.
 kolla_ansible_setup_gather_subset: "{{ omit }}"
 
-# This variable is used as "any_errors_fatal" setting for the setup (gather
-# facts) plays.
-# This is useful for weeding out failing hosts early to avoid late failures
-# due to missing facts (especially cross-host).
-# Do note this still supports host fact caching and it will not affect
-# scenarios with all facts cached (as there is no task to fail).
-kolla_ansible_setup_any_errors_fatal: false
-
 ###################
 # Kolla options
 ###################

@@ -203,7 +203,7 @@ you must override the image. if you want to use version 3.12 change
 
 .. code-block:: yaml
 
-   rabbitmq_image: "{{ docker_registry ~ '/' if docker_registry else '' }}{{ docker_namespace }}/rabbitmq-3.12"
+   rabbitmq_image: "{{ docker_registry ~ '/' if docker_registry else '' }}{{ docker_namespace }}/rabbitmq-3-12"
 
 You can then upgrade RabbitMQ with the usual command:
 

@@ -187,6 +187,16 @@ issues:
 
 At a convenient time, the upgrade can now be run.
 
+SLURP extra preparations
+++++++++++++++++++++++++
+
+RabbitMQ has two major version releases per year but does not support jumping
+two versions in one upgrade. So if you want to perform a skip-level upgrade,
+you must first upgrade RabbitMQ to an intermediary version. Please see the
+`RabbitMQ SLURP section
+<https://docs.openstack.org/kolla-ansible/latest/reference/message-queues/rabbitmq.html#slurp>`__
+for details.
+
 Perform the Upgrade
 -------------------
 

@@ -24,14 +24,6 @@
 # Dummy variable to allow Ansible to accept this file.
 workaround_ansible_issue_8743: yes
 
-# This variable is used as "any_errors_fatal" setting for the setup (gather
-# facts) plays.
-# This is useful for weeding out failing hosts early to avoid late failures
-# due to missing facts (especially cross-host).
-# Do note this still supports host fact caching and it will not affect
-# scenarios with all facts cached (as there is no task to fail).
-#kolla_ansible_setup_any_errors_fatal: false
-
 ###############
 # Kolla options
 ###############
@@ -721,7 +713,7 @@ workaround_ansible_issue_8743: yes
 #enable_prometheus_mysqld_exporter: "{{ enable_mariadb | bool }}"
 #enable_prometheus_node_exporter: "{{ enable_prometheus | bool }}"
 #enable_prometheus_cadvisor: "{{ enable_prometheus | bool }}"
-#enable_prometheus_fluentd_integration: "{{ enable_prometheus | bool and enable fluentd | bool }}"
+#enable_prometheus_fluentd_integration: "{{ enable_prometheus | bool and enable_fluentd | bool }}"
 #enable_prometheus_memcached: "{{ enable_prometheus | bool }}"
 #enable_prometheus_alertmanager: "{{ enable_prometheus | bool }}"
 #enable_prometheus_alertmanager_external: "{{ enable_prometheus_alertmanager | bool }}"

@@ -0,0 +1,14 @@
+---
+upgrade:
+  - |
+    Support for failing execution early if fact collection fails on any of the
+    hosts by setting ``kolla_ansible_setup_any_errors_fatal`` to ``true`` has
+    been removed. This is due to Ansible's ``any_errors_fatal`` parameter not
+    being templated, resulting in the value always being interpreted as
+    ``true``, even though the default value of
+    ``kolla_ansible_setup_any_errors_fatal`` is ``false``.
+
+    Equivalent behaviour is possible by setting the maximum failure percentage
+    to 0. This may be done specifically for fact gathering using
+    ``gather_facts_max_fail_percentage`` or globally using
+    ``kolla_max_fail_percentage``.
@@ -14,10 +14,10 @@ copy_logs() {
         exit 1
     fi
 
-    cp -rnL ${VOLUMES_DIR}/kolla_logs/_data/* ${LOG_DIR}/kolla/
-    cp -rnL /etc/kolla/* ${LOG_DIR}/kolla_configs/
+    [ -d ${VOLUMES_DIR}/kolla_logs/_data ] && cp -rnL ${VOLUMES_DIR}/kolla_logs/_data/* ${LOG_DIR}/kolla/
+    [ -d /etc/kolla ] && cp -rnL /etc/kolla/* ${LOG_DIR}/kolla_configs/
     # Don't save the IPA images.
-    rm ${LOG_DIR}/kolla_configs/config/ironic/ironic-agent.{kernel,initramfs}
+    rm -f ${LOG_DIR}/kolla_configs/config/ironic/ironic-agent.{kernel,initramfs}
     mkdir ${LOG_DIR}/system_configs/
     cp -rL /etc/{hostname,hosts,host.conf,resolv.conf,nsswitch.conf,systemd} ${LOG_DIR}/system_configs/
     # copy docker configs if used
@@ -35,10 +35,6 @@ copy_logs() {
         if [ "$CONTAINER_ENGINE" = "docker" ]; then
             journalctl --no-pager -u containerd.service > ${LOG_DIR}/system_logs/containerd.log
         fi
-    else
-        if [ "$CONTAINER_ENGINE" = "docker" ]; then
-            cp /var/log/upstart/docker.log ${LOG_DIR}/system_logs/docker.log
-        fi
     fi
 
     cp -r /etc/sudoers.d ${LOG_DIR}/system_logs/
@@ -99,21 +95,22 @@ copy_logs() {
     ps -eo user,pid,ppid,lwp,%cpu,%mem,size,rss,cmd > ${LOG_DIR}/system_logs/ps.txt
 
     # container engine related information
-    (${CONTAINER_ENGINE} info &&
-    ${CONTAINER_ENGINE} images &&
-    ${CONTAINER_ENGINE} ps -a &&
-    ${CONTAINER_ENGINE} network ls &&
-    ${CONTAINER_ENGINE} inspect $(${CONTAINER_ENGINE} ps -aq)) > ${LOG_DIR}/system_logs/${CONTAINER_ENGINE}-info.txt
+    [ `command -v ${CONTAINER_ENGINE}` ] &&
+    (   ${CONTAINER_ENGINE} info &&
+        ${CONTAINER_ENGINE} images &&
+        ${CONTAINER_ENGINE} ps -a &&
+        ${CONTAINER_ENGINE} network ls &&
+        ${CONTAINER_ENGINE} inspect $(${CONTAINER_ENGINE} ps -aq)) > ${LOG_DIR}/system_logs/${CONTAINER_ENGINE}-info.txt
 
     # save dbus services
-    dbus-send --system --print-reply --dest=org.freedesktop.DBus /org/freedesktop/DBus org.freedesktop.DBus.ListNames > ${LOG_DIR}/system_logs/dbus-services.txt
+    [ `command -v dbus-send` ] && dbus-send --system --print-reply --dest=org.freedesktop.DBus /org/freedesktop/DBus org.freedesktop.DBus.ListNames > ${LOG_DIR}/system_logs/dbus-services.txt
 
     # cephadm related logs
     if [ `command -v cephadm` ]; then
         mkdir -p ${LOG_DIR}/ceph
-        sudo cp /etc/ceph/ceph.conf ${LOG_DIR}/ceph
-        sudo cp /var/run/ceph/*/cluster.yml ${LOG_DIR}/ceph/cluster.yml
-        sudo cp /var/log/ceph/cephadm.log* ${LOG_DIR}/ceph/
+        [ -d /etc/ceph ] && sudo cp /etc/ceph/ceph.conf ${LOG_DIR}/ceph
+        [ -d /var/run/ceph ] && sudo cp /var/run/ceph/*/cluster.yml ${LOG_DIR}/ceph/cluster.yml
+        [ -d /var/log/ceph ] && sudo cp /var/log/ceph/cephadm.log* ${LOG_DIR}/ceph/
         sudo cephadm shell -- ceph --connect-timeout 5 -s > ${LOG_DIR}/ceph/ceph_s.txt
         sudo cephadm shell -- ceph --connect-timeout 5 osd tree > ${LOG_DIR}/ceph/ceph_osd_tree.txt
     fi

@@ -5,10 +5,6 @@
 ansible_python_interpreter: /usr/bin/python3
 {% endif %}
 
-# NOTE(yoctozepto): In CI it makes sense to always try to fail
-# as early as possible.
-kolla_ansible_setup_any_errors_fatal: true
-
 kolla_base_distro: "{{ base_distro }}"
 # Zed dropped install_type so we have it only on upgrades
 network_interface: "{{ api_interface_name }}"