From acd1bf44d016bc5cace7e4fcfb693352d7c063d1 Mon Sep 17 00:00:00 2001
From: stackhpc-ci <22933334+stackhpc-ci@users.noreply.github.com>
Date: Wed, 25 Jan 2023 09:17:10 +0000
Subject: [PATCH 01/23] feat: automatic update of workflows stackhpc/yoga

---
 .github/workflows/tag-and-release.yml | 11 +++++++++++
 .github/workflows/tox.yml             |  7 +++++++
 2 files changed, 18 insertions(+)
 create mode 100644 .github/workflows/tag-and-release.yml
 create mode 100644 .github/workflows/tox.yml

diff --git a/.github/workflows/tag-and-release.yml b/.github/workflows/tag-and-release.yml
new file mode 100644
index 00000000000..1533fa2bffd
--- /dev/null
+++ b/.github/workflows/tag-and-release.yml
@@ -0,0 +1,11 @@
+---
+name: Tag & Release
+'on':
+  push:
+    branches:
+      - stackhpc/yoga
+permissions:
+  contents: write
+jobs:
+  tag-and-release:
+    uses: stackhpc/.github/.github/workflows/tag-and-release.yml@main
diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml
new file mode 100644
index 00000000000..8713f0e02dc
--- /dev/null
+++ b/.github/workflows/tox.yml
@@ -0,0 +1,7 @@
+---
+name: Tox Continuous Integration
+'on':
+  pull_request:
+jobs:
+  tox:
+    uses: stackhpc/.github/.github/workflows/tox.yml@main

From c19959d97164fcf569edb6d88c3de444a4f6a552 Mon Sep 17 00:00:00 2001
From: stackhpc-ci <22933334+stackhpc-ci@users.noreply.github.com>
Date: Wed, 25 Jan 2023 09:18:04 +0000
Subject: [PATCH 02/23] feat: automatic update of community files stackhpc/yoga

---
 .github/CODEOWNERS | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .github/CODEOWNERS

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 00000000000..a914011ae8b
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1 @@
+* @stackhpc/openstack

From 209acd5e85d1b4d155ba746fb8d2991d33125de7 Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Thu, 10 Nov 2022 09:55:48 -0800
Subject: [PATCH 03/23] [stable-only][cve] Check VMDK create-type against an
 allowed list

NOTE(sbauza): Stable policy allows us to proactively merge a backport without waiting for the parent patch to be merged (exception to rule #4 in [1]. Marking [stable-only] in order to silence nova-tox-validate-backport

[1] https://docs.openstack.org/project-team-guide/stable-branches.html#appropriate-fixes

Related-Bug: #1996188
Change-Id: I5a399f1d3d702bfb76c067893e9c924904c8c360
---
 nova/conf/compute.py                |  9 ++++++
 nova/tests/unit/virt/test_images.py | 46 +++++++++++++++++++++++++++++
 nova/virt/images.py                 | 31 +++++++++++++++++++
 3 files changed, 86 insertions(+)

diff --git a/nova/conf/compute.py b/nova/conf/compute.py
index 5abe7694f80..352080011ad 100644
--- a/nova/conf/compute.py
+++ b/nova/conf/compute.py
@@ -1007,6 +1007,15 @@
 * ``[scheduler]query_placement_for_image_type_support`` - enables
   filtering computes based on supported image types, which is required
   to be enabled for this to take effect.
+"""),
+    cfg.ListOpt('vmdk_allowed_types',
+                default=['streamOptimized', 'monolithicSparse'],
+                help="""
+A list of strings describing allowed VMDK "create-type" subformats
+that will be allowed. This is recommended to only include
+single-file-with-sparse-header variants to avoid potential host file
+exposure due to processing named extents. If this list is empty, then no
+form of VMDK image will be allowed.
 """),
     cfg.BoolOpt('packing_host_numa_cells_allocation_strategy',
         default=True,
diff --git a/nova/tests/unit/virt/test_images.py b/nova/tests/unit/virt/test_images.py
index 085b169db3c..563330b5414 100644
--- a/nova/tests/unit/virt/test_images.py
+++ b/nova/tests/unit/virt/test_images.py
@@ -16,6 +16,8 @@
 
 import mock
 from oslo_concurrency import processutils
+from oslo_serialization import jsonutils
+from oslo_utils import imageutils
 
 from nova.compute import utils as compute_utils
 from nova import exception
@@ -135,3 +137,47 @@ def test_convert_image_without_direct_io_support(self, mock_execute,
                     '-O', 'out_format', '-f', 'in_format', 'source', 'dest')
         mock_disk_op_sema.__enter__.assert_called_once()
         self.assertTupleEqual(expected, mock_execute.call_args[0])
+
+    def test_convert_image_vmdk_allowed_list_checking(self):
+        info = {'format': 'vmdk',
+                'format-specific': {
+                    'type': 'vmdk',
+                    'data': {
+                        'create-type': 'monolithicFlat',
+                }}}
+
+        # If the format is not in the allowed list, we should get an error
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.check_vmdk_image, 'foo',
+                          imageutils.QemuImgInfo(jsonutils.dumps(info),
+                                                 format='json'))
+
+        # With the format in the allowed list, no error
+        self.flags(vmdk_allowed_types=['streamOptimized', 'monolithicFlat',
+                                       'monolithicSparse'],
+                   group='compute')
+        images.check_vmdk_image('foo',
+                                imageutils.QemuImgInfo(jsonutils.dumps(info),
+                                                       format='json'))
+
+        # With an empty list, allow nothing
+        self.flags(vmdk_allowed_types=[], group='compute')
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.check_vmdk_image, 'foo',
+                          imageutils.QemuImgInfo(jsonutils.dumps(info),
+                                                 format='json'))
+
+    @mock.patch.object(images, 'fetch')
+    @mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info')
+    def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch):
+        info = {'format': 'vmdk',
+                'format-specific': {
+                    'type': 'vmdk',
+                    'data': {
+                        'create-type': 'monolithicFlat',
+                }}}
+        mock_info.return_value = jsonutils.dumps(info)
+        with mock.patch('os.path.exists', return_value=True):
+            e = self.assertRaises(exception.ImageUnacceptable,
+                                  images.fetch_to_raw, None, 'foo', 'anypath')
+            self.assertIn('Invalid VMDK create-type specified', str(e))
diff --git a/nova/virt/images.py b/nova/virt/images.py
index 5358f3766ac..f13c8722909 100644
--- a/nova/virt/images.py
+++ b/nova/virt/images.py
@@ -110,6 +110,34 @@ def get_info(context, image_href):
     return IMAGE_API.get(context, image_href)
 
 
+def check_vmdk_image(image_id, data):
+    # Check some rules about VMDK files. Specifically we want to make
+    # sure that the "create-type" of the image is one that we allow.
+    # Some types of VMDK files can reference files outside the disk
+    # image and we do not want to allow those for obvious reasons.
+
+    types = CONF.compute.vmdk_allowed_types
+
+    if not len(types):
+        LOG.warning('Refusing to allow VMDK image as vmdk_allowed_'
+                    'types is empty')
+        msg = _('Invalid VMDK create-type specified')
+        raise exception.ImageUnacceptable(image_id=image_id, reason=msg)
+
+    try:
+        create_type = data.format_specific['data']['create-type']
+    except KeyError:
+        msg = _('Unable to determine VMDK create-type')
+        raise exception.ImageUnacceptable(image_id=image_id, reason=msg)
+
+    if create_type not in CONF.compute.vmdk_allowed_types:
+        LOG.warning('Refusing to process VMDK file with create-type of %r '
+                    'which is not in allowed set of: %s', create_type,
+                    ','.join(CONF.compute.vmdk_allowed_types))
+        msg = _('Invalid VMDK create-type specified')
+        raise exception.ImageUnacceptable(image_id=image_id, reason=msg)
+
+
 def fetch_to_raw(context, image_href, path, trusted_certs=None):
     path_tmp = "%s.part" % path
     fetch(context, image_href, path_tmp, trusted_certs)
@@ -129,6 +157,9 @@ def fetch_to_raw(context, image_href, path, trusted_certs=None):
                 reason=(_("fmt=%(fmt)s backed by: %(backing_file)s") %
                         {'fmt': fmt, 'backing_file': backing_file}))
 
+        if fmt == 'vmdk':
+            check_vmdk_image(image_href, data)
+
         if fmt != "raw" and CONF.force_raw_images:
             staged = "%s.converted" % path
             LOG.debug("%s was %s, converting to raw", image_href, fmt)

From 342fc37f3d1ed1232fe18bb7af84762d4abd8c45 Mon Sep 17 00:00:00 2001
From: stackhpc-ci <22933334+stackhpc-ci@users.noreply.github.com>
Date: Wed, 8 Feb 2023 10:02:50 +0000
Subject: [PATCH 04/23] feat: automatic update of workflows stackhpc/yoga

---
 .github/workflows/tag-and-release.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/tag-and-release.yml b/.github/workflows/tag-and-release.yml
index 1533fa2bffd..4ef4d2618d7 100644
--- a/.github/workflows/tag-and-release.yml
+++ b/.github/workflows/tag-and-release.yml
@@ -5,6 +5,7 @@ name: Tag & Release
     branches:
       - stackhpc/yoga
 permissions:
+  actions: read
   contents: write
 jobs:
   tag-and-release:

From 612225d24e8748a9cce9fbdb170c5bcffcb565eb Mon Sep 17 00:00:00 2001
From: Stephen Finucane <sfinucan@redhat.com>
Date: Thu, 15 Dec 2022 00:09:04 +0000
Subject: [PATCH 05/23] Remove use of removeprefix

This is not supported on Python 3.8 [1]. I have no idea why this was not
failing CI.

[1] https://docs.python.org/3.9/library/stdtypes.html#str.removeprefix

Change-Id: I225e9ced0f75c415b1d2fee05440291e3d8635c0
Signed-off-by: Stephen Finucane <sfinucan@redhat.com>
(cherry picked from commit 3ccf82ef9e2c87a1d33a0dda8929c05e80844087)
(cherry picked from commit 2d011ac32c132fa5c4d03f8167c2be3648d5d123)
---
 nova/tests/unit/console/test_websocketproxy.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nova/tests/unit/console/test_websocketproxy.py b/nova/tests/unit/console/test_websocketproxy.py
index 0c897e3e911..cd99ad53f0a 100644
--- a/nova/tests/unit/console/test_websocketproxy.py
+++ b/nova/tests/unit/console/test_websocketproxy.py
@@ -637,7 +637,9 @@ def test_reject_open_redirect(self, url='//example.com/%2F..'):
         # now the same url but with extra leading '/' characters removed.
         if expected_cpython in errmsg:
             location = result[3].decode()
-            location = location.removeprefix('Location: ').rstrip('\r\n')
+            if location.startswith('Location: '):
+                location = location[len('Location: '):]
+            location = location.rstrip('\r\n')
             self.assertTrue(
                 location.startswith('/example.com/%2F..'),
                 msg='Redirect location is not the expected sanitized URL',

From fa3c95b63e1ef7d35ca78ceb90de3f97a134691e Mon Sep 17 00:00:00 2001
From: Artom Lifshitz <alifshit@redhat.com>
Date: Mon, 10 Jan 2022 13:36:36 -0500
Subject: [PATCH 06/23] libvirt: remove default cputune shares value

Previously, the libvirt driver defaulted to 1024 * (# of CPUs) for the
value of domain/cputune/shares in the libvirt XML. This value is then
passed directly by libvirt to the cgroups API. Cgroups v2 imposes a
maximum value of 10000 that can be passed in. This makes Nova
unable to launch instances with more than 9 CPUs on hosts that run
cgroups v2, like Ubuntu Jammy or RHEL 9.

Fix this by just removing the default entirely. Because there is no
longer a guarantee that domain/cputune will contain at least a shares
element, we can stop always generating the former, and only generate
it if it will actually contain something.

We can also make operators's lives easier by leveraging the fact that
we update the XML during live migration, so this patch also adds a
method to remove the shares value from the live migration XML if one
was not set as the quota:cpu_shares flavor extra spec.

For operators that *have* set this extra spec to something greater
than 10000, their flavors will have to get updates, and their
instances resized.

Partial-bug: 1978489
Change-Id: I49d757f5f261b3562ada27e6cf57284f615ca395
(cherry picked from commit f77a9fee5b736899ecc39d33e4f4e4012cee751c)
(cherry picked from commit 888e837bb71464cd1c2179964ac3e853ac18db52)
---
 doc/source/admin/resource-limits.rst          |  3 +-
 nova/tests/unit/virt/libvirt/test_driver.py   | 40 ++++++------------
 .../tests/unit/virt/libvirt/test_migration.py | 42 +++++++++++++++++--
 nova/virt/libvirt/driver.py                   |  8 +---
 nova/virt/libvirt/migration.py                | 13 ++++++
 ...putune-shares-values-85d5ddf4b8e24eaa.yaml | 15 +++++++
 6 files changed, 83 insertions(+), 38 deletions(-)
 create mode 100644 releasenotes/notes/remove-default-cputune-shares-values-85d5ddf4b8e24eaa.yaml

diff --git a/doc/source/admin/resource-limits.rst b/doc/source/admin/resource-limits.rst
index c74ad31c17b..8ef248a9a1d 100644
--- a/doc/source/admin/resource-limits.rst
+++ b/doc/source/admin/resource-limits.rst
@@ -38,7 +38,8 @@ CPU limits
 Libvirt enforces CPU limits in terms of *shares* and *quotas*, configured
 via :nova:extra-spec:`quota:cpu_shares` and :nova:extra-spec:`quota:cpu_period`
 / :nova:extra-spec:`quota:cpu_quota`, respectively. Both are implemented using
-the `cgroups v1 cpu controller`__.
+the `cgroups cpu controller`__. Note that allowed values for *shares* are
+platform dependant.
 
 CPU shares are a proportional weighted share of total CPU resources relative to
 other instances. It does not limit CPU usage if CPUs are not busy. There is no
diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py
index 22abff16d30..e41cd740dd9 100644
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@@ -2989,7 +2989,7 @@ def test_get_guest_config_numa_host_instance_fits(self):
             cfg = drvr._get_guest_config(instance_ref, [],
                                          image_meta, disk_info)
             self.assertIsNone(cfg.cpuset)
-            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cputune)
             self.assertIsNone(cfg.cpu.numa)
 
     @mock.patch('nova.privsep.utils.supports_direct_io',
@@ -3024,7 +3024,7 @@ def test_get_guest_config_numa_host_instance_no_fit(self):
                                          image_meta, disk_info)
             self.assertFalse(choice_mock.called)
             self.assertIsNone(cfg.cpuset)
-            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cputune)
             self.assertIsNone(cfg.cpu.numa)
 
     def _test_get_guest_memory_backing_config(
@@ -3429,7 +3429,7 @@ def test_get_guest_config_numa_host_instance_pci_no_numa_info(self):
             cfg = conn._get_guest_config(instance_ref, [],
                                          image_meta, disk_info)
             self.assertEqual(set([3]), cfg.cpuset)
-            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cputune)
             self.assertIsNone(cfg.cpu.numa)
 
     @mock.patch('nova.privsep.utils.supports_direct_io',
@@ -3481,7 +3481,7 @@ def test_get_guest_config_numa_host_instance_2pci_no_fit(self):
                                          image_meta, disk_info)
             self.assertFalse(choice_mock.called)
             self.assertEqual(set([3]), cfg.cpuset)
-            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cputune)
             self.assertIsNone(cfg.cpu.numa)
 
     @mock.patch.object(fakelibvirt.Connection, 'getType')
@@ -3577,7 +3577,7 @@ def test_get_guest_config_numa_host_instance_fit_w_cpu_pinset(self):
             # NOTE(ndipanov): we make sure that pin_set was taken into account
             # when choosing viable cells
             self.assertEqual(set([2, 3]), cfg.cpuset)
-            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cputune)
             self.assertIsNone(cfg.cpu.numa)
 
     def test_get_guest_config_non_numa_host_instance_topo(self):
@@ -3617,7 +3617,7 @@ def test_get_guest_config_non_numa_host_instance_topo(self):
             cfg = drvr._get_guest_config(instance_ref, [],
                                          image_meta, disk_info)
             self.assertIsNone(cfg.cpuset)
-            self.assertEqual(0, len(cfg.cputune.vcpupin))
+            self.assertIsNone(cfg.cputune)
             self.assertIsNone(cfg.numatune)
             self.assertIsNotNone(cfg.cpu.numa)
             for instance_cell, numa_cfg_cell in zip(
@@ -7020,25 +7020,9 @@ def test_get_guest_config_with_rng_dev_not_present(self, mock_path):
                           [],
                           image_meta, disk_info)
 
-    def test_guest_cpu_shares_with_multi_vcpu(self):
-        self.flags(virt_type='kvm', group='libvirt')
-
-        drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
-
-        instance_ref = objects.Instance(**self.test_instance)
-        instance_ref.flavor.vcpus = 4
-        image_meta = objects.ImageMeta.from_dict(self.test_image_meta)
-
-        disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
-                                            instance_ref,
-                                            image_meta)
-
-        cfg = drvr._get_guest_config(instance_ref, [],
-                                     image_meta, disk_info)
-
-        self.assertEqual(4096, cfg.cputune.shares)
-
-    def test_get_guest_config_with_cpu_quota(self):
+    @mock.patch.object(
+        host.Host, "is_cpu_control_policy_capable", return_value=True)
+    def test_get_guest_config_with_cpu_quota(self, is_able):
         self.flags(virt_type='kvm', group='libvirt')
 
         drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True)
@@ -11608,7 +11592,7 @@ def test_live_migration_update_graphics_xml(self, mock_xml,
                                                       mock_migrateToURI3,
                                                       mock_min_version):
         self.compute = manager.ComputeManager()
-        instance_ref = self.test_instance
+        instance_ref = objects.Instance(**self.test_instance)
         target_connection = '127.0.0.2'
 
         xml_tmpl = ("<domain type='kvm'>"
@@ -12288,7 +12272,7 @@ def test_live_migration_update_serial_console_xml(self, mock_xml,
                                                       mock_get,
                                                       mock_min_version):
         self.compute = manager.ComputeManager()
-        instance_ref = self.test_instance
+        instance_ref = objects.Instance(**self.test_instance)
         target_connection = '127.0.0.2'
 
         xml_tmpl = ("<domain type='kvm'>"
@@ -12578,7 +12562,7 @@ def test_live_migration_raises_exception(self, mock_xml,
                                              mock_min_version):
         # Prepare data
         self.compute = manager.ComputeManager()
-        instance_ref = self.test_instance
+        instance_ref = objects.Instance(**self.test_instance)
         target_connection = '127.0.0.2'
 
         disk_paths = ['vda', 'vdb']
diff --git a/nova/tests/unit/virt/libvirt/test_migration.py b/nova/tests/unit/virt/libvirt/test_migration.py
index f4e64fbe53e..70488f88cf4 100644
--- a/nova/tests/unit/virt/libvirt/test_migration.py
+++ b/nova/tests/unit/virt/libvirt/test_migration.py
@@ -28,6 +28,7 @@
 from nova import test
 from nova.tests import fixtures as nova_fixtures
 from nova.tests.fixtures import libvirt as fakelibvirt
+from nova.tests.unit.virt.libvirt import test_driver
 from nova.virt.libvirt import config as vconfig
 from nova.virt.libvirt import guest as libvirt_guest
 from nova.virt.libvirt import host
@@ -80,16 +81,51 @@ def test_get_updated_guest_xml(
         get_volume_config = mock.MagicMock()
         mock_guest.get_xml_desc.return_value = '<domain></domain>'
 
-        migration.get_updated_guest_xml(
-            mock.sentinel.instance, mock_guest, data, get_volume_config)
+        instance = objects.Instance(**test_driver._create_test_instance())
+        migration.get_updated_guest_xml(instance, mock_guest, data,
+                                        get_volume_config)
         mock_graphics.assert_called_once_with(mock.ANY, data)
         mock_serial.assert_called_once_with(mock.ANY, data)
         mock_volume.assert_called_once_with(
-            mock.ANY, data, mock.sentinel.instance, get_volume_config)
+            mock.ANY, data, instance, get_volume_config)
         mock_perf_events_xml.assert_called_once_with(mock.ANY, data)
         mock_memory_backing.assert_called_once_with(mock.ANY, data)
         self.assertEqual(1, mock_tostring.called)
 
+    def test_update_quota_xml(self):
+        old_xml = """<domain>
+                         <name>fake-instance</name>
+                         <cputune>
+                             <shares>42</shares>
+                             <period>1337</period>
+                         </cputune>
+                     </domain>"""
+        instance = objects.Instance(**test_driver._create_test_instance())
+        new_xml = migration._update_quota_xml(instance,
+                                              etree.fromstring(old_xml))
+        new_xml = etree.tostring(new_xml, encoding='unicode')
+        self.assertXmlEqual(
+            """<domain>
+                   <name>fake-instance</name>
+                   <cputune>
+                       <period>1337</period>
+                   </cputune>
+               </domain>""", new_xml)
+
+    def test_update_quota_xml_empty_cputune(self):
+        old_xml = """<domain>
+                         <name>fake-instance</name>
+                         <cputune>
+                             <shares>42</shares>
+                         </cputune>
+                     </domain>"""
+        instance = objects.Instance(**test_driver._create_test_instance())
+        new_xml = migration._update_quota_xml(instance,
+                                              etree.fromstring(old_xml))
+        new_xml = etree.tostring(new_xml, encoding='unicode')
+        self.assertXmlEqual('<domain><name>fake-instance</name></domain>',
+                            new_xml)
+
     def test_update_device_resources_xml_vpmem(self):
         # original xml for vpmems, /dev/dax0.1 and /dev/dax0.2 here
         # are vpmem device path on source host
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index 615a009e062..4de51ce8e31 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -5682,15 +5682,11 @@ def _update_guest_cputune(self, guest, flavor):
         if not is_able or CONF.libvirt.virt_type not in ('lxc', 'kvm', 'qemu'):
             return
 
-        if guest.cputune is None:
-            guest.cputune = vconfig.LibvirtConfigGuestCPUTune()
-            # Setting the default cpu.shares value to be a value
-            # dependent on the number of vcpus
-        guest.cputune.shares = 1024 * guest.vcpus
-
         for name in cputuning:
             key = "quota:cpu_" + name
             if key in flavor.extra_specs:
+                if guest.cputune is None:
+                    guest.cputune = vconfig.LibvirtConfigGuestCPUTune()
                 setattr(guest.cputune, name,
                         int(flavor.extra_specs[key]))
 
diff --git a/nova/virt/libvirt/migration.py b/nova/virt/libvirt/migration.py
index 8cea9f29831..4726111a765 100644
--- a/nova/virt/libvirt/migration.py
+++ b/nova/virt/libvirt/migration.py
@@ -62,6 +62,7 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config,
         xml_doc, migrate_data, instance, get_volume_config)
     xml_doc = _update_perf_events_xml(xml_doc, migrate_data)
     xml_doc = _update_memory_backing_xml(xml_doc, migrate_data)
+    xml_doc = _update_quota_xml(instance, xml_doc)
     if get_vif_config is not None:
         xml_doc = _update_vif_xml(xml_doc, migrate_data, get_vif_config)
     if 'dst_numa_info' in migrate_data:
@@ -71,6 +72,18 @@ def get_updated_guest_xml(instance, guest, migrate_data, get_volume_config,
     return etree.tostring(xml_doc, encoding='unicode')
 
 
+def _update_quota_xml(instance, xml_doc):
+    flavor_shares = instance.flavor.extra_specs.get('quota:cpu_shares')
+    cputune = xml_doc.find('./cputune')
+    shares = xml_doc.find('./cputune/shares')
+    if shares is not None and not flavor_shares:
+        cputune.remove(shares)
+    # Remove the cputune element entirely if it has no children left.
+    if cputune is not None and not list(cputune):
+        xml_doc.remove(cputune)
+    return xml_doc
+
+
 def _update_device_resources_xml(xml_doc, new_resources):
     vpmems = []
     for resource in new_resources:
diff --git a/releasenotes/notes/remove-default-cputune-shares-values-85d5ddf4b8e24eaa.yaml b/releasenotes/notes/remove-default-cputune-shares-values-85d5ddf4b8e24eaa.yaml
new file mode 100644
index 00000000000..9dd0987bb8c
--- /dev/null
+++ b/releasenotes/notes/remove-default-cputune-shares-values-85d5ddf4b8e24eaa.yaml
@@ -0,0 +1,15 @@
+upgrade:
+  - |
+    In the libvirt driver, the default value of the ``<cputune><shares>``
+    element has been removed, and is now left to libvirt to decide. This is
+    because allowed values are platform dependant, and the previous code was
+    not guaranteed to be supported on all platforms. If any of your flavors are
+    using the quota:cpu_shares extra spec, you may need to resize to a
+    supported value before upgrading.
+
+    To facilitate the transition to no Nova default for ``<cputune><shares>``,
+    its value will be removed during live migration unless a value is set in
+    the ``quota:cpu_shares`` extra spec. This can cause temporary CPU
+    starvation for the live migrated instance if other instances on the
+    destination host still have the old default ``<cputune><shares>`` value. To
+    fix this, hard reboot, cold migrate, or live migrate the other instances.

From c77acb43ea4484cc4643ee36a03f6ac1d06afa8e Mon Sep 17 00:00:00 2001
From: John Garbutt <john.garbutt@stackhpc.com>
Date: Wed, 16 Nov 2022 17:12:40 +0000
Subject: [PATCH 07/23] Ironic nodes with instance reserved in placement

Currently, when you delete an ironic instance, we trigger
and undeploy in ironic and we release our allocation in placement.
We do this well before the ironic node is actually available.

We have attempted to fix this my marking unavailable nodes
as reserved in placement. This works great until you try
and re-image lots of nodes.

It turns out, ironic nodes that are waiting for their automatic
clean to finish, are returned as a valid allocation candidates
for quite some time. Eventually we mark then as reserved.

This patch takes a strange approach, if we mark all nodes as
reserved as soon as the instance lands, we close the race.
That is, when the allocation is removed the node is still
unavailable until the next update of placement is done and
notices that the node has become available. That may or may
not have been after automatic cleaning. The trade off is
that when you don't have automatic cleaning, we wait a bit
longer to notice the node is available again.

Note, this is also useful when a broken Ironic node is
marked as in-maintainance while it is in-use by a nova
instance. In a similar way, we mark the Nova as reserved
immmeidately, rather than first waiting for the instance to be
deleted before reserving the resources in Placement.

Closes-Bug: #1974070
Change-Id: Iab92124b5776a799c7f90d07281d28fcf191c8fe
(cherry picked from commit 3c022e968375c1b2eadf3c2dd7190b9434c6d4c1)
(cherry picked from commit c9de185ea1ac1e8d4435c5863b2ad7cefdb28c76)
(cherry picked from commit 9e486282eaf32443604d34a169272731d3ca9463)
---
 nova/conf/workarounds.py                      | 15 ++++++
 nova/tests/unit/virt/ironic/test_driver.py    | 48 +++++++++++++++++--
 nova/virt/ironic/driver.py                    | 26 ++++++----
 ...ronic-scheduler-race-08cf8aba0365f512.yaml | 11 +++++
 4 files changed, 89 insertions(+), 11 deletions(-)
 create mode 100644 releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml

diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py
index 2ec53282cdb..1116664d36d 100644
--- a/nova/conf/workarounds.py
+++ b/nova/conf/workarounds.py
@@ -416,6 +416,21 @@
         help="""
 When this is enabled, it will skip version-checking of hypervisors
 during live migration.
+"""),
+    cfg.BoolOpt(
+        'skip_reserve_in_use_ironic_nodes',
+        default=False,
+        help="""
+This may be useful if you use the Ironic driver, but don't have
+automatic cleaning enabled in Ironic. Nova, by default, will mark
+Ironic nodes as reserved as soon as they are in use. When you free
+the Ironic node (by deleting the nova instance) it takes a while
+for Nova to un-reserve that Ironic node in placement. Usually this
+is a good idea, because it avoids placement providing an Ironic
+as a valid candidate when it is still being cleaned.
+Howerver, if you don't use automatic cleaning, it can cause an
+extra delay before and Ironic node is available for building a
+new Nova instance.
 """),
 ]
 
diff --git a/nova/tests/unit/virt/ironic/test_driver.py b/nova/tests/unit/virt/ironic/test_driver.py
index ea5c0dbc05a..b3fb2fbd20a 100644
--- a/nova/tests/unit/virt/ironic/test_driver.py
+++ b/nova/tests/unit/virt/ironic/test_driver.py
@@ -931,6 +931,48 @@ def test_update_provider_tree_with_rc_occupied(self, mock_nfc, mock_nr,
 
         self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename)
 
+        expected = {
+            'CUSTOM_IRON_NFV': {
+                'total': 1,
+                'reserved': 1,
+                'min_unit': 1,
+                'max_unit': 1,
+                'step_size': 1,
+                'allocation_ratio': 1.0,
+            },
+        }
+        mock_nfc.assert_called_once_with(mock.sentinel.nodename)
+        mock_nr.assert_called_once_with(mock_nfc.return_value)
+        mock_res_used.assert_called_once_with(mock_nfc.return_value)
+        mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
+        result = self.ptree.data(mock.sentinel.nodename).inventory
+        self.assertEqual(expected, result)
+
+    @mock.patch.object(ironic_driver.IronicDriver,
+                       '_node_resources_used', return_value=True)
+    @mock.patch.object(ironic_driver.IronicDriver,
+                       '_node_resources_unavailable', return_value=False)
+    @mock.patch.object(ironic_driver.IronicDriver, '_node_resource')
+    @mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache')
+    def test_update_provider_tree_with_rc_occupied_workaround(self,
+                mock_nfc, mock_nr, mock_res_unavail, mock_res_used):
+        """Ensure that when a node is used, we report the inventory matching
+        the consumed resources.
+        """
+        self.flags(skip_reserve_in_use_ironic_nodes=True,
+                   group="workarounds")
+        mock_nr.return_value = {
+            'vcpus': 24,
+            'vcpus_used': 24,
+            'memory_mb': 1024,
+            'memory_mb_used': 1024,
+            'local_gb': 100,
+            'local_gb_used': 100,
+            'resource_class': 'iron-nfv',
+        }
+
+        self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename)
+
         expected = {
             'CUSTOM_IRON_NFV': {
                 'total': 1,
@@ -944,7 +986,7 @@ def test_update_provider_tree_with_rc_occupied(self, mock_nfc, mock_nr,
         mock_nfc.assert_called_once_with(mock.sentinel.nodename)
         mock_nr.assert_called_once_with(mock_nfc.return_value)
         mock_res_used.assert_called_once_with(mock_nfc.return_value)
-        self.assertFalse(mock_res_unavail.called)
+        mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
         result = self.ptree.data(mock.sentinel.nodename).inventory
         self.assertEqual(expected, result)
 
@@ -1015,7 +1057,7 @@ def test_update_provider_tree_no_traits(self, mock_nfc, mock_nr,
         mock_nfc.assert_called_once_with(mock.sentinel.nodename)
         mock_nr.assert_called_once_with(mock_nfc.return_value)
         mock_res_used.assert_called_once_with(mock_nfc.return_value)
-        self.assertFalse(mock_res_unavail.called)
+        mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
         result = self.ptree.data(mock.sentinel.nodename).traits
         self.assertEqual(set(), result)
 
@@ -1047,7 +1089,7 @@ def test_update_provider_tree_with_traits(self, mock_nfc, mock_nr,
         mock_nfc.assert_called_once_with(mock.sentinel.nodename)
         mock_nr.assert_called_once_with(mock_nfc.return_value)
         mock_res_used.assert_called_once_with(mock_nfc.return_value)
-        self.assertFalse(mock_res_unavail.called)
+        mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
         result = self.ptree.data(mock.sentinel.nodename).traits
         self.assertEqual(set(traits), result)
 
diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py
index f21694da478..ca0bb978e69 100644
--- a/nova/virt/ironic/driver.py
+++ b/nova/virt/ironic/driver.py
@@ -886,15 +886,25 @@ def update_provider_tree(self, provider_tree, nodename, allocations=None):
         """
         # nodename is the ironic node's UUID.
         node = self._node_from_cache(nodename)
+
         reserved = False
-        if (not self._node_resources_used(node) and
-                self._node_resources_unavailable(node)):
-            LOG.debug('Node %(node)s is not ready for a deployment, '
-                      'reporting resources as reserved for it. Node\'s '
-                      'provision state is %(prov)s, power state is '
-                      '%(power)s and maintenance is %(maint)s.',
-                      {'node': node.uuid, 'prov': node.provision_state,
-                       'power': node.power_state, 'maint': node.maintenance})
+        if self._node_resources_unavailable(node):
+            # Operators might mark a node as in maintainance,
+            # even when an instance is on the node,
+            # either way lets mark this as reserved
+            reserved = True
+
+        if (self._node_resources_used(node) and
+            not CONF.workarounds.skip_reserve_in_use_ironic_nodes):
+            # Make resources as reserved once we have
+            # and instance here.
+            # When the allocation is deleted, most likely
+            # automatic clean will start, so we keep the node
+            # reserved until it becomes available again.
+            # In the case without automatic clean, once
+            # the allocation is removed in placement it
+            # also stays as reserved until we notice on
+            # the next periodic its actually available.
             reserved = True
 
         info = self._node_resource(node)
diff --git a/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml b/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml
new file mode 100644
index 00000000000..4fd2cc1ca9f
--- /dev/null
+++ b/releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml
@@ -0,0 +1,11 @@
+---
+fixes:
+  - |
+    Fixed when placement returns ironic nodes that have just started automatic
+    cleaning as possible valid candidates. This is done by marking all ironic
+    nodes with an instance on them as reserved, such that nova only makes them
+    available once we have double checked Ironic reports the node as available.
+    If you don't have automatic cleaning on, this might mean it takes longer
+    than normal for Ironic nodes to become available for new instances.
+    If you want the old behaviour use the following workaround config:
+    `[workarounds]skip_reserve_in_use_ironic_nodes=true`

From 50d749237e6b363b23d6531e4a9b8856fd9e0e1a Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Mon, 1 Apr 2024 07:32:11 -0700
Subject: [PATCH 08/23] Reject qcow files with data-file attributes

Change-Id: Ic3fa16f55acc38cf6c1a4ac1dce4487225e66d04
Closes-Bug: #2059809
(cherry picked from commit 37c587268526e16d3d0d6d6e802a33cc10548c60)
(cherry picked from commit 888311f0083f864de0cb7efd30195a7c4d5060c0)
(cherry picked from commit e53ca8e0c558214eab2a54172bcf161cd786e8ae)
---
 nova/tests/unit/virt/libvirt/test_utils.py |  1 +
 nova/tests/unit/virt/test_images.py        | 31 ++++++++++++++++++++++
 nova/virt/images.py                        |  9 +++++++
 3 files changed, 41 insertions(+)

diff --git a/nova/tests/unit/virt/libvirt/test_utils.py b/nova/tests/unit/virt/libvirt/test_utils.py
index 4e73c662c57..797480bdc00 100644
--- a/nova/tests/unit/virt/libvirt/test_utils.py
+++ b/nova/tests/unit/virt/libvirt/test_utils.py
@@ -354,6 +354,7 @@ class FakeImgInfo(object):
             FakeImgInfo.file_format = file_format
             FakeImgInfo.backing_file = backing_file
             FakeImgInfo.virtual_size = 1
+            FakeImgInfo.format_specific = None if file_format == 'raw' else {}
 
             return FakeImgInfo()
 
diff --git a/nova/tests/unit/virt/test_images.py b/nova/tests/unit/virt/test_images.py
index 563330b5414..e0b08bb9f59 100644
--- a/nova/tests/unit/virt/test_images.py
+++ b/nova/tests/unit/virt/test_images.py
@@ -112,6 +112,37 @@ def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch):
                                images.fetch_to_raw,
                                None, 'href123', '/no/path')
 
+    @mock.patch.object(images, 'convert_image',
+                       side_effect=exception.ImageUnacceptable)
+    @mock.patch.object(images, 'qemu_img_info')
+    @mock.patch.object(images, 'fetch')
+    def test_fetch_to_raw_data_file(self, convert_image, qemu_img_info_fn,
+                                    fetch):
+        # NOTE(danms): the above test needs the following line as well, as it
+        # is broken without it.
+        qemu_img_info = qemu_img_info_fn.return_value
+        qemu_img_info.backing_file = None
+        qemu_img_info.file_format = 'qcow2'
+        qemu_img_info.virtual_size = 20
+        qemu_img_info.format_specific = {'data': {'data-file': 'somefile'}}
+        self.assertRaisesRegex(exception.ImageUnacceptable,
+                               'Image href123 is unacceptable.*somefile',
+                               images.fetch_to_raw,
+                               None, 'href123', '/no/path')
+
+    @mock.patch('os.rename')
+    @mock.patch.object(images, 'qemu_img_info')
+    @mock.patch.object(images, 'fetch')
+    def test_fetch_to_raw_from_raw(self, fetch, qemu_img_info_fn, mock_rename):
+        # Make sure we support a case where we fetch an already-raw image and
+        # qemu-img returns None for "format_specific".
+        qemu_img_info = qemu_img_info_fn.return_value
+        qemu_img_info.file_format = 'raw'
+        qemu_img_info.backing_file = None
+        qemu_img_info.format_specific = None
+        images.fetch_to_raw(None, 'href123', '/no/path')
+        mock_rename.assert_called_once_with('/no/path.part', '/no/path')
+
     @mock.patch.object(compute_utils, 'disk_ops_semaphore')
     @mock.patch('nova.privsep.utils.supports_direct_io', return_value=True)
     @mock.patch('oslo_concurrency.processutils.execute')
diff --git a/nova/virt/images.py b/nova/virt/images.py
index f13c8722909..5f80a1d0758 100644
--- a/nova/virt/images.py
+++ b/nova/virt/images.py
@@ -157,6 +157,15 @@ def fetch_to_raw(context, image_href, path, trusted_certs=None):
                 reason=(_("fmt=%(fmt)s backed by: %(backing_file)s") %
                         {'fmt': fmt, 'backing_file': backing_file}))
 
+        try:
+            data_file = data.format_specific['data']['data-file']
+        except (KeyError, TypeError, AttributeError):
+            data_file = None
+        if data_file is not None:
+            raise exception.ImageUnacceptable(image_id=image_href,
+                reason=(_("fmt=%(fmt)s has data-file: %(data_file)s") %
+                        {'fmt': fmt, 'data_file': data_file}))
+
         if fmt == 'vmdk':
             check_vmdk_image(image_href, data)
 

From c5336bed47cde5dcd8200a378a418c5d51943e91 Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Wed, 17 Apr 2024 07:06:13 -0700
Subject: [PATCH 09/23] Check images with format_inspector for safety

It has been asserted that we should not be calling qemu-img info
on untrusted files. That means we need to know if they have a
backing_file, data_file or other unsafe configuration *before* we use
qemu-img to probe or convert them.

This grafts glance's format_inspector module into nova/images so we
can use it to check the file early for safety. The expectation is that
this will be moved to oslo.utils (or something) later and thus we will
just delete the file from nova and change our import when that happens.

NOTE: This includes whitespace changes from the glance version of
format_inspector.py because of autopep8 demands.

Change-Id: Iaefbe41b4c4bf0cf95d8f621653fdf65062aaa59
Closes-Bug: #2059809
(cherry picked from commit 966cd5a1f3119d47eeb985eea2385bd12148c320)
(cherry picked from commit 4d5824f03310cbc2aa32fa8e4b27a98a9b7d9a81)
(cherry picked from commit 210ad1f04c2f20374b323e64b9ef62d1eff5cc36)
---
 nova/conf/workarounds.py                   |  10 +
 nova/image/format_inspector.py             | 889 +++++++++++++++++++++
 nova/tests/unit/virt/libvirt/test_utils.py |  48 +-
 nova/tests/unit/virt/test_images.py        | 118 ++-
 nova/virt/images.py                        |  47 +-
 5 files changed, 1103 insertions(+), 9 deletions(-)
 create mode 100644 nova/image/format_inspector.py

diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py
index 1116664d36d..3be204f29a3 100644
--- a/nova/conf/workarounds.py
+++ b/nova/conf/workarounds.py
@@ -431,6 +431,16 @@
 Howerver, if you don't use automatic cleaning, it can cause an
 extra delay before and Ironic node is available for building a
 new Nova instance.
+"""),
+    cfg.BoolOpt(
+        'disable_deep_image_inspection',
+        default=False,
+        help="""
+This disables the additional deep image inspection that the compute node does
+when downloading from glance. This includes backing-file, data-file, and
+known-features detection *before* passing the image to qemu-img. Generally,
+this inspection should be enabled for maximum safety, but this workaround
+option allows disabling it if there is a compatibility concern.
 """),
 ]
 
diff --git a/nova/image/format_inspector.py b/nova/image/format_inspector.py
new file mode 100644
index 00000000000..268c98b99cb
--- /dev/null
+++ b/nova/image/format_inspector.py
@@ -0,0 +1,889 @@
+# Copyright 2020 Red Hat, Inc
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+"""
+This is a python implementation of virtual disk format inspection routines
+gathered from various public specification documents, as well as qemu disk
+driver code. It attempts to store and parse the minimum amount of data
+required, and in a streaming-friendly manner to collect metadata about
+complex-format images.
+"""
+
+import struct
+
+from oslo_log import log as logging
+
+LOG = logging.getLogger(__name__)
+
+
+def chunked_reader(fileobj, chunk_size=512):
+    while True:
+        chunk = fileobj.read(chunk_size)
+        if not chunk:
+            break
+        yield chunk
+
+
+class CaptureRegion(object):
+    """Represents a region of a file we want to capture.
+
+    A region of a file we want to capture requires a byte offset into
+    the file and a length. This is expected to be used by a data
+    processing loop, calling capture() with the most recently-read
+    chunk. This class handles the task of grabbing the desired region
+    of data across potentially multiple fractional and unaligned reads.
+
+    :param offset: Byte offset into the file starting the region
+    :param length: The length of the region
+    """
+
+    def __init__(self, offset, length):
+        self.offset = offset
+        self.length = length
+        self.data = b''
+
+    @property
+    def complete(self):
+        """Returns True when we have captured the desired data."""
+        return self.length == len(self.data)
+
+    def capture(self, chunk, current_position):
+        """Process a chunk of data.
+
+        This should be called for each chunk in the read loop, at least
+        until complete returns True.
+
+        :param chunk: A chunk of bytes in the file
+        :param current_position: The position of the file processed by the
+                                 read loop so far. Note that this will be
+                                 the position in the file *after* the chunk
+                                 being presented.
+        """
+        read_start = current_position - len(chunk)
+        if (read_start <= self.offset <= current_position or
+                self.offset <= read_start <= (self.offset + self.length)):
+            if read_start < self.offset:
+                lead_gap = self.offset - read_start
+            else:
+                lead_gap = 0
+            self.data += chunk[lead_gap:]
+            self.data = self.data[:self.length]
+
+
+class ImageFormatError(Exception):
+    """An unrecoverable image format error that aborts the process."""
+    pass
+
+
+class TraceDisabled(object):
+    """A logger-like thing that swallows tracing when we do not want it."""
+
+    def debug(self, *a, **k):
+        pass
+
+    info = debug
+    warning = debug
+    error = debug
+
+
+class FileInspector(object):
+    """A stream-based disk image inspector.
+
+    This base class works on raw images and is subclassed for more
+    complex types. It is to be presented with the file to be examined
+    one chunk at a time, during read processing and will only store
+    as much data as necessary to determine required attributes of
+    the file.
+    """
+
+    def __init__(self, tracing=False):
+        self._total_count = 0
+
+        # NOTE(danms): The logging in here is extremely verbose for a reason,
+        # but should never really be enabled at that level at runtime. To
+        # retain all that work and assist in future debug, we have a separate
+        # debug flag that can be passed from a manual tool to turn it on.
+        if tracing:
+            self._log = logging.getLogger(str(self))
+        else:
+            self._log = TraceDisabled()
+        self._capture_regions = {}
+
+    def _capture(self, chunk, only=None):
+        for name, region in self._capture_regions.items():
+            if only and name not in only:
+                continue
+            if not region.complete:
+                region.capture(chunk, self._total_count)
+
+    def eat_chunk(self, chunk):
+        """Call this to present chunks of the file to the inspector."""
+        pre_regions = set(self._capture_regions.keys())
+
+        # Increment our position-in-file counter
+        self._total_count += len(chunk)
+
+        # Run through the regions we know of to see if they want this
+        # data
+        self._capture(chunk)
+
+        # Let the format do some post-read processing of the stream
+        self.post_process()
+
+        # Check to see if the post-read processing added new regions
+        # which may require the current chunk.
+        new_regions = set(self._capture_regions.keys()) - pre_regions
+        if new_regions:
+            self._capture(chunk, only=new_regions)
+
+    def post_process(self):
+        """Post-read hook to process what has been read so far.
+
+        This will be called after each chunk is read and potentially captured
+        by the defined regions. If any regions are defined by this call,
+        those regions will be presented with the current chunk in case it
+        is within one of the new regions.
+        """
+        pass
+
+    def region(self, name):
+        """Get a CaptureRegion by name."""
+        return self._capture_regions[name]
+
+    def new_region(self, name, region):
+        """Add a new CaptureRegion by name."""
+        if self.has_region(name):
+            # This is a bug, we tried to add the same region twice
+            raise ImageFormatError('Inspector re-added region %s' % name)
+        self._capture_regions[name] = region
+
+    def has_region(self, name):
+        """Returns True if named region has been defined."""
+        return name in self._capture_regions
+
+    @property
+    def format_match(self):
+        """Returns True if the file appears to be the expected format."""
+        return True
+
+    @property
+    def virtual_size(self):
+        """Returns the virtual size of the disk image, or zero if unknown."""
+        return self._total_count
+
+    @property
+    def actual_size(self):
+        """Returns the total size of the file, usually smaller than
+        virtual_size. NOTE: this will only be accurate if the entire
+        file is read and processed.
+        """
+        return self._total_count
+
+    @property
+    def complete(self):
+        """Returns True if we have all the information needed."""
+        return all(r.complete for r in self._capture_regions.values())
+
+    def __str__(self):
+        """The string name of this file format."""
+        return 'raw'
+
+    @property
+    def context_info(self):
+        """Return info on amount of data held in memory for auditing.
+
+        This is a dict of region:sizeinbytes items that the inspector
+        uses to examine the file.
+        """
+        return {name: len(region.data) for name, region in
+                self._capture_regions.items()}
+
+    @classmethod
+    def from_file(cls, filename):
+        """Read as much of a file as necessary to complete inspection.
+
+        NOTE: Because we only read as much of the file as necessary, the
+        actual_size property will not reflect the size of the file, but the
+        amount of data we read before we satisfied the inspector.
+
+        Raises ImageFormatError if we cannot parse the file.
+        """
+        inspector = cls()
+        with open(filename, 'rb') as f:
+            for chunk in chunked_reader(f):
+                inspector.eat_chunk(chunk)
+                if inspector.complete:
+                    # No need to eat any more data
+                    break
+        if not inspector.complete or not inspector.format_match:
+            raise ImageFormatError('File is not in requested format')
+        return inspector
+
+    def safety_check(self):
+        """Perform some checks to determine if this file is safe.
+
+        Returns True if safe, False otherwise. It may raise ImageFormatError
+        if safety cannot be guaranteed because of parsing or other errors.
+        """
+        return True
+
+
+# The qcow2 format consists of a big-endian 72-byte header, of which
+# only a small portion has information we care about:
+#
+# Dec   Hex   Name
+#   0  0x00   Magic 4-bytes 'QFI\xfb'
+#   4  0x04   Version (uint32_t, should always be 2 for modern files)
+#  . . .
+#   8  0x08   Backing file offset (uint64_t)
+#  24  0x18   Size in bytes (unint64_t)
+#  . . .
+#  72  0x48   Incompatible features bitfield (6 bytes)
+#
+# https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt
+class QcowInspector(FileInspector):
+    """QEMU QCOW2 Format
+
+    This should only require about 32 bytes of the beginning of the file
+    to determine the virtual size, and 104 bytes to perform the safety check.
+    """
+
+    BF_OFFSET = 0x08
+    BF_OFFSET_LEN = 8
+    I_FEATURES = 0x48
+    I_FEATURES_LEN = 8
+    I_FEATURES_DATAFILE_BIT = 3
+    I_FEATURES_MAX_BIT = 4
+
+    def __init__(self, *a, **k):
+        super(QcowInspector, self).__init__(*a, **k)
+        self.new_region('header', CaptureRegion(0, 512))
+
+    def _qcow_header_data(self):
+        magic, version, bf_offset, bf_sz, cluster_bits, size = (
+            struct.unpack('>4sIQIIQ', self.region('header').data[:32]))
+        return magic, size
+
+    @property
+    def has_header(self):
+        return self.region('header').complete
+
+    @property
+    def virtual_size(self):
+        if not self.region('header').complete:
+            return 0
+        if not self.format_match:
+            return 0
+        magic, size = self._qcow_header_data()
+        return size
+
+    @property
+    def format_match(self):
+        if not self.region('header').complete:
+            return False
+        magic, size = self._qcow_header_data()
+        return magic == b'QFI\xFB'
+
+    @property
+    def has_backing_file(self):
+        if not self.region('header').complete:
+            return None
+        if not self.format_match:
+            return False
+        bf_offset_bytes = self.region('header').data[
+            self.BF_OFFSET:self.BF_OFFSET + self.BF_OFFSET_LEN]
+        # nonzero means "has a backing file"
+        bf_offset, = struct.unpack('>Q', bf_offset_bytes)
+        return bf_offset != 0
+
+    @property
+    def has_unknown_features(self):
+        if not self.region('header').complete:
+            return None
+        if not self.format_match:
+            return False
+        i_features = self.region('header').data[
+            self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN]
+
+        # This is the maximum byte number we should expect any bits to be set
+        max_byte = self.I_FEATURES_MAX_BIT // 8
+
+        # The flag bytes are in big-endian ordering, so if we process
+        # them in index-order, they're reversed
+        for i, byte_num in enumerate(reversed(range(self.I_FEATURES_LEN))):
+            if byte_num == max_byte:
+                # If we're in the max-allowed byte, allow any bits less than
+                # the maximum-known feature flag bit to be set
+                allow_mask = ((1 << self.I_FEATURES_MAX_BIT) - 1)
+            elif byte_num > max_byte:
+                # If we're above the byte with the maximum known feature flag
+                # bit, then we expect all zeroes
+                allow_mask = 0x0
+            else:
+                # Any earlier-than-the-maximum byte can have any of the flag
+                # bits set
+                allow_mask = 0xFF
+
+            if i_features[i] & ~allow_mask:
+                LOG.warning('Found unknown feature bit in byte %i: %s/%s',
+                            byte_num, bin(i_features[byte_num] & ~allow_mask),
+                            bin(allow_mask))
+                return True
+
+        return False
+
+    @property
+    def has_data_file(self):
+        if not self.region('header').complete:
+            return None
+        if not self.format_match:
+            return False
+        i_features = self.region('header').data[
+            self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN]
+
+        # First byte of bitfield, which is i_features[7]
+        byte = self.I_FEATURES_LEN - 1 - self.I_FEATURES_DATAFILE_BIT // 8
+        # Third bit of bitfield, which is 0x04
+        bit = 1 << (self.I_FEATURES_DATAFILE_BIT - 1 % 8)
+        return bool(i_features[byte] & bit)
+
+    def __str__(self):
+        return 'qcow2'
+
+    def safety_check(self):
+        return (not self.has_backing_file and
+                not self.has_data_file and
+                not self.has_unknown_features)
+
+
+# The VHD (or VPC as QEMU calls it) format consists of a big-endian
+# 512-byte "footer" at the beginning of the file with various
+# information, most of which does not matter to us:
+#
+# Dec   Hex   Name
+#   0  0x00   Magic string (8-bytes, always 'conectix')
+#  40  0x28   Disk size (uint64_t)
+#
+# https://github.com/qemu/qemu/blob/master/block/vpc.c
+class VHDInspector(FileInspector):
+    """Connectix/MS VPC VHD Format
+
+    This should only require about 512 bytes of the beginning of the file
+    to determine the virtual size.
+    """
+
+    def __init__(self, *a, **k):
+        super(VHDInspector, self).__init__(*a, **k)
+        self.new_region('header', CaptureRegion(0, 512))
+
+    @property
+    def format_match(self):
+        return self.region('header').data.startswith(b'conectix')
+
+    @property
+    def virtual_size(self):
+        if not self.region('header').complete:
+            return 0
+
+        if not self.format_match:
+            return 0
+
+        return struct.unpack('>Q', self.region('header').data[40:48])[0]
+
+    def __str__(self):
+        return 'vhd'
+
+
+# The VHDX format consists of a complex dynamic little-endian
+# structure with multiple regions of metadata and data, linked by
+# offsets with in the file (and within regions), identified by MSFT
+# GUID strings. The header is a 320KiB structure, only a few pieces of
+# which we actually need to capture and interpret:
+#
+#     Dec    Hex  Name
+#      0 0x00000  Identity (Technically 9-bytes, padded to 64KiB, the first
+#                 8 bytes of which are 'vhdxfile')
+# 196608 0x30000  The Region table (64KiB of a 32-byte header, followed
+#                 by up to 2047 36-byte region table entry structures)
+#
+# The region table header includes two items we need to read and parse,
+# which are:
+#
+# 196608 0x30000  4-byte signature ('regi')
+# 196616 0x30008  Entry count (uint32-t)
+#
+# The region table entries follow the region table header immediately
+# and are identified by a 16-byte GUID, and provide an offset of the
+# start of that region. We care about the "metadata region", identified
+# by the METAREGION class variable. The region table entry is (offsets
+# from the beginning of the entry, since it could be in multiple places):
+#
+#      0 0x00000 16-byte MSFT GUID
+#     16 0x00010 Offset of the actual metadata region (uint64_t)
+#
+# When we find the METAREGION table entry, we need to grab that offset
+# and start examining the region structure at that point. That
+# consists of a metadata table of structures, which point to places in
+# the data in an unstructured space that follows. The header is
+# (offsets relative to the region start):
+#
+#      0 0x00000 8-byte signature ('metadata')
+#      . . .
+#     16 0x00010 2-byte entry count (up to 2047 entries max)
+#
+# This header is followed by the specified number of metadata entry
+# structures, identified by GUID:
+#
+#      0 0x00000 16-byte MSFT GUID
+#     16 0x00010 4-byte offset (uint32_t, relative to the beginning of
+#                the metadata region)
+#
+# We need to find the "Virtual Disk Size" metadata item, identified by
+# the GUID in the VIRTUAL_DISK_SIZE class variable, grab the offset,
+# add it to the offset of the metadata region, and examine that 8-byte
+# chunk of data that follows.
+#
+# The "Virtual Disk Size" is a naked uint64_t which contains the size
+# of the virtual disk, and is our ultimate target here.
+#
+# https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-vhdx/83e061f8-f6e2-4de1-91bd-5d518a43d477
+class VHDXInspector(FileInspector):
+    """MS VHDX Format
+
+    This requires some complex parsing of the stream. The first 256KiB
+    of the image is stored to get the header and region information,
+    and then we capture the first metadata region to read those
+    records, find the location of the virtual size data and parse
+    it. This needs to store the metadata table entries up until the
+    VDS record, which may consist of up to 2047 32-byte entries at
+    max.  Finally, it must store a chunk of data at the offset of the
+    actual VDS uint64.
+
+    """
+    METAREGION = '8B7CA206-4790-4B9A-B8FE-575F050F886E'
+    VIRTUAL_DISK_SIZE = '2FA54224-CD1B-4876-B211-5DBED83BF4B8'
+    VHDX_METADATA_TABLE_MAX_SIZE = 32 * 2048  # From qemu
+
+    def __init__(self, *a, **k):
+        super(VHDXInspector, self).__init__(*a, **k)
+        self.new_region('ident', CaptureRegion(0, 32))
+        self.new_region('header', CaptureRegion(192 * 1024, 64 * 1024))
+
+    def post_process(self):
+        # After reading a chunk, we may have the following conditions:
+        #
+        # 1. We may have just completed the header region, and if so,
+        #    we need to immediately read and calculate the location of
+        #    the metadata region, as it may be starting in the same
+        #    read we just did.
+        # 2. We may have just completed the metadata region, and if so,
+        #    we need to immediately calculate the location of the
+        #    "virtual disk size" record, as it may be starting in the
+        #    same read we just did.
+        if self.region('header').complete and not self.has_region('metadata'):
+            region = self._find_meta_region()
+            if region:
+                self.new_region('metadata', region)
+        elif self.has_region('metadata') and not self.has_region('vds'):
+            region = self._find_meta_entry(self.VIRTUAL_DISK_SIZE)
+            if region:
+                self.new_region('vds', region)
+
+    @property
+    def format_match(self):
+        return self.region('ident').data.startswith(b'vhdxfile')
+
+    @staticmethod
+    def _guid(buf):
+        """Format a MSFT GUID from the 16-byte input buffer."""
+        guid_format = '<IHHBBBBBBBB'
+        return '%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X' % (
+            struct.unpack(guid_format, buf))
+
+    def _find_meta_region(self):
+        # The region table entries start after a 16-byte table header
+        region_entry_first = 16
+
+        # Parse the region table header to find the number of regions
+        regi, cksum, count, reserved = struct.unpack(
+            '<IIII', self.region('header').data[:16])
+        if regi != 0x69676572:
+            raise ImageFormatError('Region signature not found at %x' % (
+                self.region('header').offset))
+
+        if count >= 2048:
+            raise ImageFormatError('Region count is %i (limit 2047)' % count)
+
+        # Process the regions until we find the metadata one; grab the
+        # offset and return
+        self._log.debug('Region entry first is %x', region_entry_first)
+        self._log.debug('Region entries %i', count)
+        meta_offset = 0
+        for i in range(0, count):
+            entry_start = region_entry_first + (i * 32)
+            entry_end = entry_start + 32
+            entry = self.region('header').data[entry_start:entry_end]
+            self._log.debug('Entry offset is %x', entry_start)
+
+            # GUID is the first 16 bytes
+            guid = self._guid(entry[:16])
+            if guid == self.METAREGION:
+                # This entry is the metadata region entry
+                meta_offset, meta_len, meta_req = struct.unpack(
+                    '<QII', entry[16:])
+                self._log.debug('Meta entry %i specifies offset: %x',
+                                i, meta_offset)
+                # NOTE(danms): The meta_len in the region descriptor is the
+                # entire size of the metadata table and data. This can be
+                # very large, so we should only capture the size required
+                # for the maximum length of the table, which is one 32-byte
+                # table header, plus up to 2047 32-byte entries.
+                meta_len = 2048 * 32
+                return CaptureRegion(meta_offset, meta_len)
+
+        self._log.warning('Did not find metadata region')
+        return None
+
+    def _find_meta_entry(self, desired_guid):
+        meta_buffer = self.region('metadata').data
+        if len(meta_buffer) < 32:
+            # Not enough data yet for full header
+            return None
+
+        # Make sure we found the metadata region by checking the signature
+        sig, reserved, count = struct.unpack('<8sHH', meta_buffer[:12])
+        if sig != b'metadata':
+            raise ImageFormatError(
+                'Invalid signature for metadata region: %r' % sig)
+
+        entries_size = 32 + (count * 32)
+        if len(meta_buffer) < entries_size:
+            # Not enough data yet for all metadata entries. This is not
+            # strictly necessary as we could process whatever we have until
+            # we find the V-D-S one, but there are only 2047 32-byte
+            # entries max (~64k).
+            return None
+
+        if count >= 2048:
+            raise ImageFormatError(
+                'Metadata item count is %i (limit 2047)' % count)
+
+        for i in range(0, count):
+            entry_offset = 32 + (i * 32)
+            guid = self._guid(meta_buffer[entry_offset:entry_offset + 16])
+            if guid == desired_guid:
+                # Found the item we are looking for by id.
+                # Stop our region from capturing
+                item_offset, item_length, _reserved = struct.unpack(
+                    '<III',
+                    meta_buffer[entry_offset + 16:entry_offset + 28])
+                item_length = min(item_length,
+                                  self.VHDX_METADATA_TABLE_MAX_SIZE)
+                self.region('metadata').length = len(meta_buffer)
+                self._log.debug('Found entry at offset %x', item_offset)
+                # Metadata item offset is from the beginning of the metadata
+                # region, not the file.
+                return CaptureRegion(
+                    self.region('metadata').offset + item_offset,
+                    item_length)
+
+        self._log.warning('Did not find guid %s', desired_guid)
+        return None
+
+    @property
+    def virtual_size(self):
+        # Until we have found the offset and have enough metadata buffered
+        # to read it, return "unknown"
+        if not self.has_region('vds') or not self.region('vds').complete:
+            return 0
+
+        size, = struct.unpack('<Q', self.region('vds').data)
+        return size
+
+    def __str__(self):
+        return 'vhdx'
+
+
+# The VMDK format comes in a large number of variations, but the
+# single-file 'monolithicSparse' version 4 one is mostly what we care
+# about. It contains a 512-byte little-endian header, followed by a
+# variable-length "descriptor" region of text. The header looks like:
+#
+#   Dec  Hex  Name
+#     0 0x00  4-byte magic string 'KDMV'
+#     4 0x04  Version (uint32_t)
+#     8 0x08  Flags (uint32_t, unused by us)
+#    16 0x10  Number of 512 byte sectors in the disk (uint64_t)
+#    24 0x18  Granularity (uint64_t, unused by us)
+#    32 0x20  Descriptor offset in 512-byte sectors (uint64_t)
+#    40 0x28  Descriptor size in 512-byte sectors (uint64_t)
+#
+# After we have the header, we need to find the descriptor region,
+# which starts at the sector identified in the "descriptor offset"
+# field, and is "descriptor size" 512-byte sectors long. Once we have
+# that region, we need to parse it as text, looking for the
+# createType=XXX line that specifies the mechanism by which the data
+# extents are stored in this file. We only support the
+# "monolithicSparse" format, so we just need to confirm that this file
+# contains that specifier.
+#
+# https://www.vmware.com/app/vmdk/?src=vmdk
+class VMDKInspector(FileInspector):
+    """vmware VMDK format (monolithicSparse and streamOptimized variants only)
+
+    This needs to store the 512 byte header and the descriptor region
+    which should be just after that. The descriptor region is some
+    variable number of 512 byte sectors, but is just text defining the
+    layout of the disk.
+    """
+
+    # The beginning and max size of the descriptor is also hardcoded in Qemu
+    # at 0x200 and 1MB - 1
+    DESC_OFFSET = 0x200
+    DESC_MAX_SIZE = (1 << 20) - 1
+    GD_AT_END = 0xffffffffffffffff
+
+    def __init__(self, *a, **k):
+        super(VMDKInspector, self).__init__(*a, **k)
+        self.new_region('header', CaptureRegion(0, 512))
+
+    def post_process(self):
+        # If we have just completed the header region, we need to calculate
+        # the location and length of the descriptor, which should immediately
+        # follow and may have been partially-read in this read.
+        if not self.region('header').complete:
+            return
+
+        (sig, ver, _flags, _sectors, _grain, desc_sec, desc_num,
+         _numGTEsperGT, _rgdOffset, gdOffset) = struct.unpack(
+            '<4sIIQQQQIQQ', self.region('header').data[:64])
+
+        if sig != b'KDMV':
+            raise ImageFormatError('Signature KDMV not found: %r' % sig)
+
+        if ver not in (1, 2, 3):
+            raise ImageFormatError('Unsupported format version %i' % ver)
+
+        if gdOffset == self.GD_AT_END:
+            # This means we have a footer, which takes precedence over the
+            # header, which we cannot support since we stream.
+            raise ImageFormatError('Unsupported VMDK footer')
+
+        # Since we parse both desc_sec and desc_num (the location of the
+        # VMDK's descriptor, expressed in 512 bytes sectors) we enforce a
+        # check on the bounds to create a reasonable CaptureRegion. This
+        # is similar to how it's done in qemu.
+        desc_offset = desc_sec * 512
+        desc_size = min(desc_num * 512, self.DESC_MAX_SIZE)
+        if desc_offset != self.DESC_OFFSET:
+            raise ImageFormatError("Wrong descriptor location")
+
+        if not self.has_region('descriptor'):
+            self.new_region('descriptor', CaptureRegion(
+                desc_offset, desc_size))
+
+    @property
+    def format_match(self):
+        return self.region('header').data.startswith(b'KDMV')
+
+    @property
+    def virtual_size(self):
+        if not self.has_region('descriptor'):
+            # Not enough data yet
+            return 0
+
+        descriptor_rgn = self.region('descriptor')
+        if not descriptor_rgn.complete:
+            # Not enough data yet
+            return 0
+
+        descriptor = descriptor_rgn.data
+        type_idx = descriptor.index(b'createType="') + len(b'createType="')
+        type_end = descriptor.find(b'"', type_idx)
+        # Make sure we don't grab and log a huge chunk of data in a
+        # maliciously-formatted descriptor region
+        if type_end - type_idx < 64:
+            vmdktype = descriptor[type_idx:type_end]
+        else:
+            vmdktype = b'formatnotfound'
+        if vmdktype not in (b'monolithicSparse', b'streamOptimized'):
+            LOG.warning('Unsupported VMDK format %s', vmdktype)
+            return 0
+
+        # If we have the descriptor, we definitely have the header
+        _sig, _ver, _flags, sectors, _grain, _desc_sec, _desc_num = (
+            struct.unpack('<IIIQQQQ', self.region('header').data[:44]))
+
+        return sectors * 512
+
+    def safety_check(self):
+        if (not self.has_region('descriptor') or
+                not self.region('descriptor').complete):
+            return False
+
+        try:
+            # Descriptor is padded to 512 bytes
+            desc_data = self.region('descriptor').data.rstrip(b'\x00')
+            # Descriptor is actually case-insensitive ASCII text
+            desc_text = desc_data.decode('ascii').lower()
+        except UnicodeDecodeError:
+            LOG.error('VMDK descriptor failed to decode as ASCII')
+            raise ImageFormatError('Invalid VMDK descriptor data')
+
+        extent_access = ('rw', 'rdonly', 'noaccess')
+        header_fields = []
+        extents = []
+        ddb = []
+
+        # NOTE(danms): Cautiously parse the VMDK descriptor. Each line must
+        # be something we understand, otherwise we refuse it.
+        for line in [x.strip() for x in desc_text.split('\n')]:
+            if line.startswith('#') or not line:
+                # Blank or comment lines are ignored
+                continue
+            elif line.startswith('ddb'):
+                # DDB lines are allowed (but not used by us)
+                ddb.append(line)
+            elif '=' in line and ' ' not in line.split('=')[0]:
+                # Header fields are a single word followed by an '=' and some
+                # value
+                header_fields.append(line)
+            elif line.split(' ')[0] in extent_access:
+                # Extent lines start with one of the three access modes
+                extents.append(line)
+            else:
+                # Anything else results in a rejection
+                LOG.error('Unsupported line %r in VMDK descriptor', line)
+                raise ImageFormatError('Invalid VMDK descriptor data')
+
+        # Check all the extent lines for concerning content
+        for extent_line in extents:
+            if '/' in extent_line:
+                LOG.error('Extent line %r contains unsafe characters',
+                          extent_line)
+                return False
+
+        if not extents:
+            LOG.error('VMDK file specified no extents')
+            return False
+
+        return True
+
+    def __str__(self):
+        return 'vmdk'
+
+
+# The VirtualBox VDI format consists of a 512-byte little-endian
+# header, some of which we care about:
+#
+#  Dec   Hex  Name
+#   64  0x40  4-byte Magic (0xbeda107f)
+#   . . .
+#  368 0x170  Size in bytes (uint64_t)
+#
+# https://github.com/qemu/qemu/blob/master/block/vdi.c
+class VDIInspector(FileInspector):
+    """VirtualBox VDI format
+
+    This only needs to store the first 512 bytes of the image.
+    """
+
+    def __init__(self, *a, **k):
+        super(VDIInspector, self).__init__(*a, **k)
+        self.new_region('header', CaptureRegion(0, 512))
+
+    @property
+    def format_match(self):
+        if not self.region('header').complete:
+            return False
+
+        signature, = struct.unpack('<I', self.region('header').data[0x40:0x44])
+        return signature == 0xbeda107f
+
+    @property
+    def virtual_size(self):
+        if not self.region('header').complete:
+            return 0
+        if not self.format_match:
+            return 0
+
+        size, = struct.unpack('<Q', self.region('header').data[0x170:0x178])
+        return size
+
+    def __str__(self):
+        return 'vdi'
+
+
+class InfoWrapper(object):
+    """A file-like object that wraps another and updates a format inspector.
+
+    This passes chunks to the format inspector while reading. If the inspector
+    fails, it logs the error and stops calling it, but continues proxying data
+    from the source to its user.
+    """
+
+    def __init__(self, source, fmt):
+        self._source = source
+        self._format = fmt
+        self._error = False
+
+    def __iter__(self):
+        return self
+
+    def _process_chunk(self, chunk):
+        if not self._error:
+            try:
+                self._format.eat_chunk(chunk)
+            except Exception as e:
+                # Absolutely do not allow the format inspector to break
+                # our streaming of the image. If we failed, just stop
+                # trying, log and keep going.
+                LOG.error('Format inspector failed, aborting: %s', e)
+                self._error = True
+
+    def __next__(self):
+        try:
+            chunk = next(self._source)
+        except StopIteration:
+            raise
+        self._process_chunk(chunk)
+        return chunk
+
+    def read(self, size):
+        chunk = self._source.read(size)
+        self._process_chunk(chunk)
+        return chunk
+
+    def close(self):
+        if hasattr(self._source, 'close'):
+            self._source.close()
+
+
+def get_inspector(format_name):
+    """Returns a FormatInspector class based on the given name.
+
+    :param format_name: The name of the disk_format (raw, qcow2, etc).
+    :returns: A FormatInspector or None if unsupported.
+    """
+    formats = {
+        'raw': FileInspector,
+        'qcow2': QcowInspector,
+        'vhd': VHDInspector,
+        'vhdx': VHDXInspector,
+        'vmdk': VMDKInspector,
+        'vdi': VDIInspector,
+    }
+
+    return formats.get(format_name)
diff --git a/nova/tests/unit/virt/libvirt/test_utils.py b/nova/tests/unit/virt/libvirt/test_utils.py
index 797480bdc00..d95cc488723 100644
--- a/nova/tests/unit/virt/libvirt/test_utils.py
+++ b/nova/tests/unit/virt/libvirt/test_utils.py
@@ -29,6 +29,7 @@
 from nova.compute import utils as compute_utils
 from nova import context
 from nova import exception
+from nova.image import format_inspector
 from nova import objects
 from nova.objects import fields as obj_fields
 import nova.privsep.fs
@@ -321,11 +322,13 @@ def test_fetch_initrd_image(self, mock_images):
         mock_images.assert_called_once_with(
             _context, image_id, target, trusted_certs)
 
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch.object(format_inspector, 'get_inspector')
     @mock.patch.object(compute_utils, 'disk_ops_semaphore')
     @mock.patch('nova.privsep.utils.supports_direct_io', return_value=True)
     @mock.patch('nova.privsep.qemu.unprivileged_convert_image')
     def test_fetch_raw_image(self, mock_convert_image, mock_direct_io,
-                             mock_disk_op_sema):
+                             mock_disk_op_sema, mock_gi, mock_glance):
 
         def fake_rename(old, new):
             self.executes.append(('mv', old, new))
@@ -336,7 +339,7 @@ def fake_unlink(path):
         def fake_rm_on_error(path, remove=None):
             self.executes.append(('rm', '-f', path))
 
-        def fake_qemu_img_info(path):
+        def fake_qemu_img_info(path, format=None):
             class FakeImgInfo(object):
                 pass
 
@@ -365,6 +368,8 @@ class FakeImgInfo(object):
         self.stub_out('oslo_utils.fileutils.delete_if_exists',
                       fake_rm_on_error)
 
+        mock_inspector = mock_gi.return_value.from_file.return_value
+
         # Since the remove param of fileutils.remove_path_on_error()
         # is initialized at load time, we must provide a wrapper
         # that explicitly resets it to our fake delete_if_exists()
@@ -375,6 +380,9 @@ class FakeImgInfo(object):
         context = 'opaque context'
         image_id = '4'
 
+        # Make sure qcow2 gets converted to raw
+        mock_inspector.safety_check.return_value = True
+        mock_glance.get.return_value = {'disk_format': 'qcow2'}
         target = 't.qcow2'
         self.executes = []
         expected_commands = [('rm', 't.qcow2.part'),
@@ -386,14 +394,44 @@ class FakeImgInfo(object):
             't.qcow2.part', 't.qcow2.converted', 'qcow2', 'raw',
             CONF.instances_path, False)
         mock_convert_image.reset_mock()
-
+        mock_inspector.safety_check.assert_called_once_with()
+        mock_gi.assert_called_once_with('qcow2')
+
+        # Make sure raw does not get converted
+        mock_gi.reset_mock()
+        mock_inspector.safety_check.reset_mock()
+        mock_inspector.safety_check.return_value = True
+        mock_glance.get.return_value = {'disk_format': 'raw'}
         target = 't.raw'
         self.executes = []
         expected_commands = [('mv', 't.raw.part', 't.raw')]
         images.fetch_to_raw(context, image_id, target)
         self.assertEqual(self.executes, expected_commands)
         mock_convert_image.assert_not_called()
-
+        mock_inspector.safety_check.assert_called_once_with()
+        mock_gi.assert_called_once_with('raw')
+
+        # Make sure safety check failure prevents us from proceeding
+        mock_gi.reset_mock()
+        mock_inspector.safety_check.reset_mock()
+        mock_inspector.safety_check.return_value = False
+        mock_glance.get.return_value = {'disk_format': 'qcow2'}
+        target = 'backing.qcow2'
+        self.executes = []
+        expected_commands = [('rm', '-f', 'backing.qcow2.part')]
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, context, image_id, target)
+        self.assertEqual(self.executes, expected_commands)
+        mock_convert_image.assert_not_called()
+        mock_inspector.safety_check.assert_called_once_with()
+        mock_gi.assert_called_once_with('qcow2')
+
+        # Make sure a format mismatch prevents us from proceeding
+        mock_gi.reset_mock()
+        mock_inspector.safety_check.reset_mock()
+        mock_inspector.safety_check.side_effect = (
+            format_inspector.ImageFormatError)
+        mock_glance.get.return_value = {'disk_format': 'qcow2'}
         target = 'backing.qcow2'
         self.executes = []
         expected_commands = [('rm', '-f', 'backing.qcow2.part')]
@@ -401,6 +439,8 @@ class FakeImgInfo(object):
                           images.fetch_to_raw, context, image_id, target)
         self.assertEqual(self.executes, expected_commands)
         mock_convert_image.assert_not_called()
+        mock_inspector.safety_check.assert_called_once_with()
+        mock_gi.assert_called_once_with('qcow2')
 
         del self.executes
 
diff --git a/nova/tests/unit/virt/test_images.py b/nova/tests/unit/virt/test_images.py
index e0b08bb9f59..29cb6994e44 100644
--- a/nova/tests/unit/virt/test_images.py
+++ b/nova/tests/unit/virt/test_images.py
@@ -21,6 +21,7 @@
 
 from nova.compute import utils as compute_utils
 from nova import exception
+from nova.image import format_inspector
 from nova import test
 from nova.virt import images
 
@@ -99,11 +100,17 @@ def test_qemu_img_info_with_disk_not_found(self, exists, mocked_execute):
         exists.assert_called_once_with(path)
         mocked_execute.assert_called_once()
 
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
     @mock.patch.object(images, 'convert_image',
                        side_effect=exception.ImageUnacceptable)
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
-    def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch):
+    def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch,
+                                 get_inspector, glance):
+        inspector = get_inspector.return_value.from_file.return_value
+        inspector.safety_check.return_value = True
+        glance.get.return_value = {'disk_format': 'qcow2'}
         qemu_img_info.backing_file = None
         qemu_img_info.file_format = 'qcow2'
         qemu_img_info.virtual_size = 20
@@ -112,12 +119,17 @@ def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch):
                                images.fetch_to_raw,
                                None, 'href123', '/no/path')
 
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
     @mock.patch.object(images, 'convert_image',
                        side_effect=exception.ImageUnacceptable)
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
     def test_fetch_to_raw_data_file(self, convert_image, qemu_img_info_fn,
-                                    fetch):
+                                    fetch, mock_gi, mock_glance):
+        mock_glance.get.return_value = {'disk_format': 'qcow2'}
+        inspector = mock_gi.return_value.from_file.return_value
+        inspector.safety_check.return_value = True
         # NOTE(danms): the above test needs the following line as well, as it
         # is broken without it.
         qemu_img_info = qemu_img_info_fn.return_value
@@ -130,12 +142,16 @@ def test_fetch_to_raw_data_file(self, convert_image, qemu_img_info_fn,
                                images.fetch_to_raw,
                                None, 'href123', '/no/path')
 
+    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch.object(images, 'IMAGE_API')
     @mock.patch('os.rename')
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
-    def test_fetch_to_raw_from_raw(self, fetch, qemu_img_info_fn, mock_rename):
+    def test_fetch_to_raw_from_raw(self, fetch, qemu_img_info_fn, mock_rename,
+                                   mock_glance, mock_gi):
         # Make sure we support a case where we fetch an already-raw image and
         # qemu-img returns None for "format_specific".
+        mock_glance.get.return_value = {'disk_format': 'raw'}
         qemu_img_info = qemu_img_info_fn.return_value
         qemu_img_info.file_format = 'raw'
         qemu_img_info.backing_file = None
@@ -198,9 +214,15 @@ def test_convert_image_vmdk_allowed_list_checking(self):
                           imageutils.QemuImgInfo(jsonutils.dumps(info),
                                                  format='json'))
 
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
     @mock.patch.object(images, 'fetch')
     @mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info')
-    def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch):
+    def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch, mock_gi,
+                                     mock_glance):
+        mock_glance.get.return_value = {'disk_format': 'vmdk'}
+        inspector = mock_gi.return_value.from_file.return_value
+        inspector.safety_check.return_value = True
         info = {'format': 'vmdk',
                 'format-specific': {
                     'type': 'vmdk',
@@ -212,3 +234,91 @@ def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch):
             e = self.assertRaises(exception.ImageUnacceptable,
                                   images.fetch_to_raw, None, 'foo', 'anypath')
             self.assertIn('Invalid VMDK create-type specified', str(e))
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch.object(images, 'qemu_img_info')
+    @mock.patch.object(images, 'fetch')
+    def test_fetch_to_raw_inspector(self, fetch, qemu_img_info, mock_gi,
+                                    mock_glance):
+        # Image claims to be qcow2, is qcow2, but fails safety check, so we
+        # abort before qemu-img-info
+        mock_glance.get.return_value = {'disk_format': 'qcow2'}
+        inspector = mock_gi.return_value.from_file.return_value
+        inspector.safety_check.return_value = False
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        qemu_img_info.assert_not_called()
+        mock_gi.assert_called_once_with('qcow2')
+        mock_gi.return_value.from_file.assert_called_once_with('/no.path.part')
+        inspector.safety_check.assert_called_once_with()
+        mock_glance.get.assert_called_once_with(None, 'href123')
+
+        # Image claims to be qcow2, is qcow2, passes safety check, so we make
+        # it all the way to qemu-img-info
+        inspector.safety_check.return_value = True
+        qemu_img_info.side_effect = test.TestingException
+        self.assertRaises(test.TestingException,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+
+        # Image claims to be qcow2 in glance, but the image is something else,
+        # so we abort before qemu-img-info
+        qemu_img_info.reset_mock()
+        mock_gi.reset_mock()
+        inspector.safety_check.reset_mock()
+        mock_gi.return_value.from_file.side_effect = (
+            format_inspector.ImageFormatError)
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        mock_gi.assert_called_once_with('qcow2')
+        inspector.safety_check.assert_not_called()
+        qemu_img_info.assert_not_called()
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch.object(images, 'qemu_img_info')
+    @mock.patch.object(images, 'fetch')
+    def test_fetch_to_raw_inspector_disabled(self, fetch, qemu_img_info,
+                                             mock_gi, mock_glance):
+        self.flags(disable_deep_image_inspection=True,
+                   group='workarounds')
+        qemu_img_info.side_effect = test.TestingException
+        self.assertRaises(test.TestingException,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        # If deep inspection is disabled, we should never call the inspector
+        mock_gi.assert_not_called()
+        # ... and we let qemu-img detect the format itself.
+        qemu_img_info.assert_called_once_with('/no.path.part',
+                                              format=None)
+        mock_glance.get.assert_not_called()
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch.object(images, 'qemu_img_info')
+    def test_fetch_inspect_ami(self, imginfo, glance):
+        glance.get.return_value = {'disk_format': 'ami'}
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        # Make sure 'ami was translated into 'raw' before we call qemu-img
+        imginfo.assert_called_once_with('/no.path.part', format='raw')
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch.object(images, 'qemu_img_info')
+    def test_fetch_inspect_unknown_format(self, imginfo, glance):
+        glance.get.return_value = {'disk_format': 'commodore-64-disk'}
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        # Unsupported formats do not make it past deep inspection
+        imginfo.assert_not_called()
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch.object(images, 'qemu_img_info')
+    @mock.patch('nova.image.format_inspector.get_inspector')
+    def test_fetch_inspect_disagrees_qemu(self, mock_gi, imginfo, glance):
+        glance.get.return_value = {'disk_format': 'qcow2'}
+        # Glance and inspector think it is a qcow2 file, but qemu-img does not
+        # agree. It was forced to interpret as a qcow2, but returned no
+        # format information as a result.
+        imginfo.return_value.data_file = None
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        imginfo.assert_called_once_with('/no.path.part', format='qcow2')
diff --git a/nova/virt/images.py b/nova/virt/images.py
index 5f80a1d0758..8376985c111 100644
--- a/nova/virt/images.py
+++ b/nova/virt/images.py
@@ -30,6 +30,7 @@
 import nova.conf
 from nova import exception
 from nova.i18n import _
+from nova.image import format_inspector
 from nova.image import glance
 import nova.privsep.qemu
 
@@ -138,13 +139,57 @@ def check_vmdk_image(image_id, data):
         raise exception.ImageUnacceptable(image_id=image_id, reason=msg)
 
 
+def do_image_deep_inspection(img, image_href, path):
+    disk_format = img['disk_format']
+    try:
+        # NOTE(danms): Use our own cautious inspector module to make sure
+        # the image file passes safety checks.
+        # See https://bugs.launchpad.net/nova/+bug/2059809 for details.
+        inspector_cls = format_inspector.get_inspector(disk_format)
+        if not inspector_cls.from_file(path).safety_check():
+            raise exception.ImageUnacceptable(
+                image_id=image_href,
+                reason=(_('Image does not pass safety check')))
+    except format_inspector.ImageFormatError:
+        # If the inspector we chose based on the image's metadata does not
+        # think the image is the proper format, we refuse to use it.
+        raise exception.ImageUnacceptable(
+            image_id=image_href,
+            reason=_('Image content does not match disk_format'))
+    except AttributeError:
+        # No inspector was found
+        LOG.warning('Unable to perform deep image inspection on type %r',
+                    img['disk_format'])
+        if disk_format == 'ami':
+            # A lot of things can be in a UEC, although it is typically a raw
+            # filesystem. We really have nothing we can do other than treat it
+            # like a 'raw', which is what qemu-img will detect a filesystem as
+            # anyway. If someone puts a qcow2 inside, we should fail because
+            # we won't do our inspection.
+            disk_format = 'raw'
+        else:
+            raise exception.ImageUnacceptable(
+                image_id=image_href,
+                reason=_('Image not in a supported format'))
+    return disk_format
+
+
 def fetch_to_raw(context, image_href, path, trusted_certs=None):
     path_tmp = "%s.part" % path
     fetch(context, image_href, path_tmp, trusted_certs)
 
     with fileutils.remove_path_on_error(path_tmp):
-        data = qemu_img_info(path_tmp)
+        if not CONF.workarounds.disable_deep_image_inspection:
+            # If we're doing deep inspection, we take the determined format
+            # from it.
+            img = IMAGE_API.get(context, image_href)
+            force_format = do_image_deep_inspection(img, image_href, path_tmp)
+        else:
+            force_format = None
 
+        # Only run qemu-img after we have done deep inspection (if enabled).
+        # If it was not enabled, we will let it detect the format.
+        data = qemu_img_info(path_tmp, format=force_format)
         fmt = data.file_format
         if fmt is None:
             raise exception.ImageUnacceptable(

From fe88152e24408e1c616319fa57115e62d197d1f1 Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Mon, 24 Jun 2024 09:09:36 -0700
Subject: [PATCH 10/23] Additional qemu safety checking on base images

There is an additional way we can be fooled into using a qcow2 file
with a data-file, which is uploading it as raw to glance and then
booting an instance from it. Because when we go to create the
ephemeral disk from a cached base image, we've lost the information
about the original source's format, we probe the image's file type
without a strict format specified. If a qcow2 file is listed in
glance as a raw, we won't notice it until it is too late.

This brings over another piece of code (proposed against) glance's
format inspector which provides a safe format detection routine. This
patch uses that to detect the format of and run a safety check on the
base image each time we go to use it to create an ephemeral disk
image from it.

This also detects QED files and always marks them as unsafe as we do
not support that format at all. Since we could be fooled into
downloading one and passing it to qemu-img if we don't recognize it,
we need to detect and reject it as unsafe.

Change-Id: I4881c8cbceb30c1ff2d2b859c554e0d02043f1f5
(cherry picked from commit 5d85ffded64b194a447b63042f78960b82c544f7)
(cherry picked from commit a343ed60a3d813b4c8da42cf70a7c1cfd92e6bec)
(cherry picked from commit 5d18a6478dfebebeaaddd8ba54ae0e203948d9b4)
---
 nova/image/format_inspector.py                | 70 ++++++++++++++++---
 nova/tests/unit/virt/libvirt/test_driver.py   |  7 +-
 .../unit/virt/libvirt/test_imagebackend.py    | 45 ++++++++++--
 nova/virt/libvirt/imagebackend.py             | 15 ++++
 nova/virt/libvirt/utils.py                    | 28 ++++++++
 5 files changed, 149 insertions(+), 16 deletions(-)

diff --git a/nova/image/format_inspector.py b/nova/image/format_inspector.py
index 268c98b99cb..8e57d7ed2c4 100644
--- a/nova/image/format_inspector.py
+++ b/nova/image/format_inspector.py
@@ -368,6 +368,23 @@ def safety_check(self):
                 not self.has_unknown_features)
 
 
+class QEDInspector(FileInspector):
+    def __init__(self, tracing=False):
+        super().__init__(tracing)
+        self.new_region('header', CaptureRegion(0, 512))
+
+    @property
+    def format_match(self):
+        if not self.region('header').complete:
+            return False
+        return self.region('header').data.startswith(b'QED\x00')
+
+    def safety_check(self):
+        # QED format is not supported by anyone, but we want to detect it
+        # and mark it as just always unsafe.
+        return False
+
+
 # The VHD (or VPC as QEMU calls it) format consists of a big-endian
 # 512-byte "footer" at the beginning of the file with various
 # information, most of which does not matter to us:
@@ -871,19 +888,52 @@ def close(self):
             self._source.close()
 
 
+ALL_FORMATS = {
+    'raw': FileInspector,
+    'qcow2': QcowInspector,
+    'vhd': VHDInspector,
+    'vhdx': VHDXInspector,
+    'vmdk': VMDKInspector,
+    'vdi': VDIInspector,
+    'qed': QEDInspector,
+}
+
+
 def get_inspector(format_name):
     """Returns a FormatInspector class based on the given name.
 
     :param format_name: The name of the disk_format (raw, qcow2, etc).
     :returns: A FormatInspector or None if unsupported.
     """
-    formats = {
-        'raw': FileInspector,
-        'qcow2': QcowInspector,
-        'vhd': VHDInspector,
-        'vhdx': VHDXInspector,
-        'vmdk': VMDKInspector,
-        'vdi': VDIInspector,
-    }
-
-    return formats.get(format_name)
+
+    return ALL_FORMATS.get(format_name)
+
+
+def detect_file_format(filename):
+    """Attempts to detect the format of a file.
+
+    This runs through a file one time, running all the known inspectors in
+    parallel. It stops reading the file once one of them matches or all of
+    them are sure they don't match.
+
+    Returns the FileInspector that matched, if any. None if 'raw'.
+    """
+    inspectors = {k: v() for k, v in ALL_FORMATS.items()}
+    with open(filename, 'rb') as f:
+        for chunk in chunked_reader(f):
+            for format, inspector in list(inspectors.items()):
+                try:
+                    inspector.eat_chunk(chunk)
+                except ImageFormatError:
+                    # No match, so stop considering this format
+                    inspectors.pop(format)
+                    continue
+                if (inspector.format_match and inspector.complete and
+                        format != 'raw'):
+                    # First complete match (other than raw) wins
+                    return inspector
+            if all(i.complete for i in inspectors.values()):
+                # If all the inspectors are sure they are not a match, avoid
+                # reading to the end of the file to settle on 'raw'.
+                break
+    return inspectors['raw']
diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py
index e41cd740dd9..0e053f7d531 100644
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@@ -13793,10 +13793,11 @@ def test_create_images_and_backing_images_exist(
                                             '/fake/instance/dir', disk_info)
         self.assertFalse(mock_fetch_image.called)
 
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch('nova.privsep.path.utime')
     @mock.patch('nova.virt.libvirt.utils.create_cow_image')
     def test_create_images_and_backing_ephemeral_gets_created(
-            self, mock_create_cow_image, mock_utime):
+            self, mock_create_cow_image, mock_utime, mock_detect):
         drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
 
         base_dir = os.path.join(CONF.instances_path,
@@ -15532,11 +15533,13 @@ def test_create_ephemeral_specified_fs(self, fake_mkfs):
         fake_mkfs.assert_has_calls([mock.call('ext4', '/dev/something',
                                               'myVol')])
 
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch('nova.privsep.path.utime')
     @mock.patch('nova.virt.libvirt.utils.fetch_image')
     @mock.patch('nova.virt.libvirt.utils.create_cow_image')
     def test_create_ephemeral_specified_fs_not_valid(
-            self, mock_create_cow_image, mock_fetch_image, mock_utime):
+            self, mock_create_cow_image, mock_fetch_image, mock_utime,
+            mock_detect):
         CONF.set_override('default_ephemeral_format', 'ext4')
         ephemerals = [{'device_type': 'disk',
                        'disk_bus': 'virtio',
diff --git a/nova/tests/unit/virt/libvirt/test_imagebackend.py b/nova/tests/unit/virt/libvirt/test_imagebackend.py
index decb27f9824..ce6cf3909a6 100644
--- a/nova/tests/unit/virt/libvirt/test_imagebackend.py
+++ b/nova/tests/unit/virt/libvirt/test_imagebackend.py
@@ -522,13 +522,15 @@ def test_cache_template_exists(self, mock_exists):
 
         mock_exists.assert_has_calls(exist_calls)
 
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(imagebackend.utils, 'synchronized')
     @mock.patch('nova.virt.libvirt.utils.create_cow_image')
     @mock.patch.object(os.path, 'exists', side_effect=[])
     @mock.patch.object(imagebackend.Image, 'verify_base_size')
     @mock.patch('nova.privsep.path.utime')
     def test_create_image(
-        self, mock_utime, mock_verify, mock_exist, mock_create, mock_sync
+        self, mock_utime, mock_verify, mock_exist, mock_create, mock_sync,
+        mock_detect_format
     ):
         mock_sync.side_effect = lambda *a, **kw: self._fake_deco
         fn = mock.MagicMock()
@@ -549,7 +551,10 @@ def test_create_image(
         mock_exist.assert_has_calls(exist_calls)
         self.assertTrue(mock_sync.called)
         mock_utime.assert_called()
+        mock_detect_format.assert_called_once()
+        mock_detect_format.return_value.safety_check.assert_called_once_with()
 
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(imagebackend.utils, 'synchronized')
     @mock.patch('nova.virt.libvirt.utils.create_cow_image')
     @mock.patch.object(imagebackend.disk, 'extend')
@@ -557,7 +562,8 @@ def test_create_image(
     @mock.patch.object(imagebackend.Qcow2, 'get_disk_size')
     @mock.patch('nova.privsep.path.utime')
     def test_create_image_too_small(self, mock_utime, mock_get, mock_exist,
-                                    mock_extend, mock_create, mock_sync):
+                                    mock_extend, mock_create, mock_sync,
+                                    mock_detect_format):
         mock_sync.side_effect = lambda *a, **kw: self._fake_deco
         mock_get.return_value = self.SIZE
         fn = mock.MagicMock()
@@ -574,7 +580,9 @@ def test_create_image_too_small(self, mock_utime, mock_get, mock_exist,
         self.assertTrue(mock_sync.called)
         self.assertFalse(mock_create.called)
         self.assertFalse(mock_extend.called)
+        mock_detect_format.assert_called_once()
 
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(imagebackend.utils, 'synchronized')
     @mock.patch('nova.virt.libvirt.utils.create_cow_image')
     @mock.patch('nova.virt.libvirt.utils.get_disk_backing_file')
@@ -586,7 +594,8 @@ def test_create_image_too_small(self, mock_utime, mock_get, mock_exist,
     def test_generate_resized_backing_files(self, mock_utime, mock_copy,
                                             mock_verify, mock_exist,
                                             mock_extend, mock_get,
-                                            mock_create, mock_sync):
+                                            mock_create, mock_sync,
+                                            mock_detect_format):
         mock_sync.side_effect = lambda *a, **kw: self._fake_deco
         mock_get.return_value = self.QCOW2_BASE
         fn = mock.MagicMock()
@@ -613,7 +622,9 @@ def test_generate_resized_backing_files(self, mock_utime, mock_copy,
         self.assertTrue(mock_sync.called)
         self.assertFalse(mock_create.called)
         mock_utime.assert_called()
+        mock_detect_format.assert_called_once()
 
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(imagebackend.utils, 'synchronized')
     @mock.patch('nova.virt.libvirt.utils.create_cow_image')
     @mock.patch('nova.virt.libvirt.utils.get_disk_backing_file')
@@ -624,7 +635,8 @@ def test_generate_resized_backing_files(self, mock_utime, mock_copy,
     def test_qcow2_exists_and_has_no_backing_file(self, mock_utime,
                                                   mock_verify, mock_exist,
                                                   mock_extend, mock_get,
-                                                  mock_create, mock_sync):
+                                                  mock_create, mock_sync,
+                                                  mock_detect_format):
         mock_sync.side_effect = lambda *a, **kw: self._fake_deco
         mock_get.return_value = None
         fn = mock.MagicMock()
@@ -645,6 +657,31 @@ def test_qcow2_exists_and_has_no_backing_file(self, mock_utime,
         self.assertTrue(mock_sync.called)
         self.assertFalse(mock_create.called)
         self.assertFalse(mock_extend.called)
+        mock_detect_format.assert_called_once()
+
+    @mock.patch('nova.image.format_inspector.detect_file_format')
+    @mock.patch.object(imagebackend.utils, 'synchronized')
+    @mock.patch('nova.virt.libvirt.utils.create_image')
+    @mock.patch('nova.virt.libvirt.utils.get_disk_backing_file')
+    @mock.patch.object(imagebackend.disk, 'extend')
+    @mock.patch.object(os.path, 'exists', side_effect=[])
+    @mock.patch.object(imagebackend.Image, 'verify_base_size')
+    def test_qcow2_exists_and_fails_safety_check(self,
+                                                 mock_verify, mock_exist,
+                                                 mock_extend, mock_get,
+                                                 mock_create, mock_sync,
+                                                 mock_detect_format):
+        mock_detect_format.return_value.safety_check.return_value = False
+        mock_sync.side_effect = lambda *a, **kw: self._fake_deco
+        mock_get.return_value = None
+        fn = mock.MagicMock()
+        mock_exist.side_effect = [False, True, False, True, True]
+        image = self.image_class(self.INSTANCE, self.NAME)
+
+        self.assertRaises(exception.InvalidDiskInfo,
+                          image.create_image, fn, self.TEMPLATE_PATH,
+                          self.SIZE)
+        mock_verify.assert_not_called()
 
     def test_resolve_driver_format(self):
         image = self.image_class(self.INSTANCE, self.NAME)
diff --git a/nova/virt/libvirt/imagebackend.py b/nova/virt/libvirt/imagebackend.py
index 617adfe0303..6a5252b11b3 100644
--- a/nova/virt/libvirt/imagebackend.py
+++ b/nova/virt/libvirt/imagebackend.py
@@ -34,6 +34,7 @@
 import nova.conf
 from nova import exception
 from nova.i18n import _
+from nova.image import format_inspector
 from nova.image import glance
 import nova.privsep.libvirt
 import nova.privsep.path
@@ -637,6 +638,20 @@ def create_qcow2_image(base, target, size):
         if not os.path.exists(base):
             prepare_template(target=base, *args, **kwargs)
 
+        # NOTE(danms): We need to perform safety checks on the base image
+        # before we inspect it for other attributes. We do this each time
+        # because additional safety checks could have been added since we
+        # downloaded the image.
+        if not CONF.workarounds.disable_deep_image_inspection:
+            inspector = format_inspector.detect_file_format(base)
+            if not inspector.safety_check():
+                LOG.warning('Base image %s failed safety check', base)
+                # NOTE(danms): This is the same exception as would be raised
+                # by qemu_img_info() if the disk format was unreadable or
+                # otherwise unsuitable.
+                raise exception.InvalidDiskInfo(
+                    reason=_('Base image failed safety check'))
+
         # NOTE(ankit): Update the mtime of the base file so the image
         # cache manager knows it is in use.
         _update_utime_ignore_eacces(base)
diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py
index a1b9459b7e6..6ff86e9bcac 100644
--- a/nova/virt/libvirt/utils.py
+++ b/nova/virt/libvirt/utils.py
@@ -34,6 +34,7 @@
 from nova import context as nova_context
 from nova import exception
 from nova.i18n import _
+from nova.image import format_inspector
 from nova import objects
 from nova.objects import fields as obj_fields
 import nova.privsep.fs
@@ -139,7 +140,34 @@ def create_cow_image(
     base_cmd = ['qemu-img', 'create', '-f', 'qcow2']
     cow_opts = []
     if backing_file:
+        # NOTE(danms): We need to perform safety checks on the base image
+        # before we inspect it for other attributes. We do this each time
+        # because additional safety checks could have been added since we
+        # downloaded the image.
+        if not CONF.workarounds.disable_deep_image_inspection:
+            inspector = format_inspector.detect_file_format(backing_file)
+            if not inspector.safety_check():
+                LOG.warning('Base image %s failed safety check', backing_file)
+                # NOTE(danms): This is the same exception as would be raised
+                # by qemu_img_info() if the disk format was unreadable or
+                # otherwise unsuitable.
+                raise exception.InvalidDiskInfo(
+                    reason=_('Base image failed safety check'))
+
         base_details = images.qemu_img_info(backing_file)
+        if base_details.backing_file is not None:
+            LOG.warning('Base image %s failed safety check', backing_file)
+            raise exception.InvalidDiskInfo(
+                reason=_('Base image failed safety check'))
+        try:
+            data_file = base_details.format_specific['data']['data-file']
+        except (KeyError, TypeError, AttributeError):
+            data_file = None
+        if data_file is not None:
+            LOG.warning('Base image %s failed safety check', backing_file)
+            raise exception.InvalidDiskInfo(
+                reason=_('Base image failed safety check'))
+
         cow_opts += ['backing_file=%s' % backing_file]
         cow_opts += ['backing_fmt=%s' % base_details.file_format]
     else:

From 7248c46f339b7e321177e24ad7b5fa00a34be7b2 Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Mon, 1 Jul 2024 09:06:40 -0700
Subject: [PATCH 11/23] Fix vmdk_allowed_types checking

Related-Bug: #2059809
---
 nova/virt/libvirt/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py
index 6ff86e9bcac..93c5b38cb77 100644
--- a/nova/virt/libvirt/utils.py
+++ b/nova/virt/libvirt/utils.py
@@ -155,6 +155,8 @@ def create_cow_image(
                     reason=_('Base image failed safety check'))
 
         base_details = images.qemu_img_info(backing_file)
+        if base_details.file_format == 'vmdk':
+            images.check_vmdk_image('base', base_details)
         if base_details.backing_file is not None:
             LOG.warning('Base image %s failed safety check', backing_file)
             raise exception.InvalidDiskInfo(

From 37e0f4b41705407da36385a66a07af152259d411 Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Tue, 2 Jul 2024 10:34:19 +0000
Subject: [PATCH 12/23] Fix up merge conflicts in unit tests

---
 nova/tests/unit/virt/libvirt/test_utils.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/nova/tests/unit/virt/libvirt/test_utils.py b/nova/tests/unit/virt/libvirt/test_utils.py
index d95cc488723..8d348918ce8 100644
--- a/nova/tests/unit/virt/libvirt/test_utils.py
+++ b/nova/tests/unit/virt/libvirt/test_utils.py
@@ -117,11 +117,15 @@ def test_create_image(self, mock_execute):
     @mock.patch('os.path.exists', return_value=True)
     @mock.patch('oslo_concurrency.processutils.execute')
     @mock.patch('nova.virt.images.qemu_img_info')
-    def test_create_cow_image(self, mock_info, mock_execute, mock_exists):
+    @mock.patch('nova.image.format_inspector.detect_file_format')
+    def test_create_cow_image(self, mock_detect, mock_info, mock_execute,
+                              mock_exists):
         mock_execute.return_value = ('stdout', None)
         mock_info.return_value = mock.Mock(
             file_format=mock.sentinel.backing_fmt,
-            cluster_size=mock.sentinel.cluster_size)
+            cluster_size=mock.sentinel.cluster_size,
+            backing_file=None)
+        mock_detect.return_value.safety_check.return_value = True
         libvirt_utils.create_cow_image(mock.sentinel.backing_path,
                                        mock.sentinel.new_path)
         mock_info.assert_called_once_with(mock.sentinel.backing_path)
@@ -131,6 +135,7 @@ def test_create_cow_image(self, mock_info, mock_execute, mock_exists):
                 mock.sentinel.backing_path, mock.sentinel.backing_fmt,
                 mock.sentinel.cluster_size),
              mock.sentinel.new_path)])
+        mock_detect.return_value.safety_check.assert_called_once_with()
 
     @ddt.unpack
     @ddt.data({'fs_type': 'some_fs_type',

From 79e6840987fbcb8fdaf455fdfa74ecba2e8e56e0 Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Mon, 1 Apr 2024 07:32:11 -0700
Subject: [PATCH 13/23] Reject qcow files with data-file attributes

Change-Id: Ic3fa16f55acc38cf6c1a4ac1dce4487225e66d04
Closes-Bug: #2059809
(cherry picked from commit ec9c55cbbc91d1f31e42ced289a7c82cf79dc2a2)
(cherry picked from commit 58d933eafb3f7164419000700a305c8f75d5cb6e)
(cherry picked from commit 736328f78fb88b6d567b94b50cd14b3ebef08a5e)
(cherry picked from commit af4d819c606d6662d0b086365a51f5220b596e48)
(cherry picked from commit d69d441cf5d82f69d8ed7d555a6af73624866400)
(cherry picked from commit f844c8fe3ccbf5b477c007ac1d2e290c9d74f2e6)
---
 nova/tests/unit/virt/test_images.py | 118 +---------------------------
 1 file changed, 4 insertions(+), 114 deletions(-)

diff --git a/nova/tests/unit/virt/test_images.py b/nova/tests/unit/virt/test_images.py
index 29cb6994e44..e0b08bb9f59 100644
--- a/nova/tests/unit/virt/test_images.py
+++ b/nova/tests/unit/virt/test_images.py
@@ -21,7 +21,6 @@
 
 from nova.compute import utils as compute_utils
 from nova import exception
-from nova.image import format_inspector
 from nova import test
 from nova.virt import images
 
@@ -100,17 +99,11 @@ def test_qemu_img_info_with_disk_not_found(self, exists, mocked_execute):
         exists.assert_called_once_with(path)
         mocked_execute.assert_called_once()
 
-    @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch('nova.image.format_inspector.get_inspector')
     @mock.patch.object(images, 'convert_image',
                        side_effect=exception.ImageUnacceptable)
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
-    def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch,
-                                 get_inspector, glance):
-        inspector = get_inspector.return_value.from_file.return_value
-        inspector.safety_check.return_value = True
-        glance.get.return_value = {'disk_format': 'qcow2'}
+    def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch):
         qemu_img_info.backing_file = None
         qemu_img_info.file_format = 'qcow2'
         qemu_img_info.virtual_size = 20
@@ -119,17 +112,12 @@ def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch,
                                images.fetch_to_raw,
                                None, 'href123', '/no/path')
 
-    @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch('nova.image.format_inspector.get_inspector')
     @mock.patch.object(images, 'convert_image',
                        side_effect=exception.ImageUnacceptable)
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
     def test_fetch_to_raw_data_file(self, convert_image, qemu_img_info_fn,
-                                    fetch, mock_gi, mock_glance):
-        mock_glance.get.return_value = {'disk_format': 'qcow2'}
-        inspector = mock_gi.return_value.from_file.return_value
-        inspector.safety_check.return_value = True
+                                    fetch):
         # NOTE(danms): the above test needs the following line as well, as it
         # is broken without it.
         qemu_img_info = qemu_img_info_fn.return_value
@@ -142,16 +130,12 @@ def test_fetch_to_raw_data_file(self, convert_image, qemu_img_info_fn,
                                images.fetch_to_raw,
                                None, 'href123', '/no/path')
 
-    @mock.patch('nova.image.format_inspector.get_inspector')
-    @mock.patch.object(images, 'IMAGE_API')
     @mock.patch('os.rename')
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
-    def test_fetch_to_raw_from_raw(self, fetch, qemu_img_info_fn, mock_rename,
-                                   mock_glance, mock_gi):
+    def test_fetch_to_raw_from_raw(self, fetch, qemu_img_info_fn, mock_rename):
         # Make sure we support a case where we fetch an already-raw image and
         # qemu-img returns None for "format_specific".
-        mock_glance.get.return_value = {'disk_format': 'raw'}
         qemu_img_info = qemu_img_info_fn.return_value
         qemu_img_info.file_format = 'raw'
         qemu_img_info.backing_file = None
@@ -214,15 +198,9 @@ def test_convert_image_vmdk_allowed_list_checking(self):
                           imageutils.QemuImgInfo(jsonutils.dumps(info),
                                                  format='json'))
 
-    @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch('nova.image.format_inspector.get_inspector')
     @mock.patch.object(images, 'fetch')
     @mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info')
-    def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch, mock_gi,
-                                     mock_glance):
-        mock_glance.get.return_value = {'disk_format': 'vmdk'}
-        inspector = mock_gi.return_value.from_file.return_value
-        inspector.safety_check.return_value = True
+    def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch):
         info = {'format': 'vmdk',
                 'format-specific': {
                     'type': 'vmdk',
@@ -234,91 +212,3 @@ def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch, mock_gi,
             e = self.assertRaises(exception.ImageUnacceptable,
                                   images.fetch_to_raw, None, 'foo', 'anypath')
             self.assertIn('Invalid VMDK create-type specified', str(e))
-
-    @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch('nova.image.format_inspector.get_inspector')
-    @mock.patch.object(images, 'qemu_img_info')
-    @mock.patch.object(images, 'fetch')
-    def test_fetch_to_raw_inspector(self, fetch, qemu_img_info, mock_gi,
-                                    mock_glance):
-        # Image claims to be qcow2, is qcow2, but fails safety check, so we
-        # abort before qemu-img-info
-        mock_glance.get.return_value = {'disk_format': 'qcow2'}
-        inspector = mock_gi.return_value.from_file.return_value
-        inspector.safety_check.return_value = False
-        self.assertRaises(exception.ImageUnacceptable,
-                          images.fetch_to_raw, None, 'href123', '/no.path')
-        qemu_img_info.assert_not_called()
-        mock_gi.assert_called_once_with('qcow2')
-        mock_gi.return_value.from_file.assert_called_once_with('/no.path.part')
-        inspector.safety_check.assert_called_once_with()
-        mock_glance.get.assert_called_once_with(None, 'href123')
-
-        # Image claims to be qcow2, is qcow2, passes safety check, so we make
-        # it all the way to qemu-img-info
-        inspector.safety_check.return_value = True
-        qemu_img_info.side_effect = test.TestingException
-        self.assertRaises(test.TestingException,
-                          images.fetch_to_raw, None, 'href123', '/no.path')
-
-        # Image claims to be qcow2 in glance, but the image is something else,
-        # so we abort before qemu-img-info
-        qemu_img_info.reset_mock()
-        mock_gi.reset_mock()
-        inspector.safety_check.reset_mock()
-        mock_gi.return_value.from_file.side_effect = (
-            format_inspector.ImageFormatError)
-        self.assertRaises(exception.ImageUnacceptable,
-                          images.fetch_to_raw, None, 'href123', '/no.path')
-        mock_gi.assert_called_once_with('qcow2')
-        inspector.safety_check.assert_not_called()
-        qemu_img_info.assert_not_called()
-
-    @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch('nova.image.format_inspector.get_inspector')
-    @mock.patch.object(images, 'qemu_img_info')
-    @mock.patch.object(images, 'fetch')
-    def test_fetch_to_raw_inspector_disabled(self, fetch, qemu_img_info,
-                                             mock_gi, mock_glance):
-        self.flags(disable_deep_image_inspection=True,
-                   group='workarounds')
-        qemu_img_info.side_effect = test.TestingException
-        self.assertRaises(test.TestingException,
-                          images.fetch_to_raw, None, 'href123', '/no.path')
-        # If deep inspection is disabled, we should never call the inspector
-        mock_gi.assert_not_called()
-        # ... and we let qemu-img detect the format itself.
-        qemu_img_info.assert_called_once_with('/no.path.part',
-                                              format=None)
-        mock_glance.get.assert_not_called()
-
-    @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch.object(images, 'qemu_img_info')
-    def test_fetch_inspect_ami(self, imginfo, glance):
-        glance.get.return_value = {'disk_format': 'ami'}
-        self.assertRaises(exception.ImageUnacceptable,
-                          images.fetch_to_raw, None, 'href123', '/no.path')
-        # Make sure 'ami was translated into 'raw' before we call qemu-img
-        imginfo.assert_called_once_with('/no.path.part', format='raw')
-
-    @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch.object(images, 'qemu_img_info')
-    def test_fetch_inspect_unknown_format(self, imginfo, glance):
-        glance.get.return_value = {'disk_format': 'commodore-64-disk'}
-        self.assertRaises(exception.ImageUnacceptable,
-                          images.fetch_to_raw, None, 'href123', '/no.path')
-        # Unsupported formats do not make it past deep inspection
-        imginfo.assert_not_called()
-
-    @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch.object(images, 'qemu_img_info')
-    @mock.patch('nova.image.format_inspector.get_inspector')
-    def test_fetch_inspect_disagrees_qemu(self, mock_gi, imginfo, glance):
-        glance.get.return_value = {'disk_format': 'qcow2'}
-        # Glance and inspector think it is a qcow2 file, but qemu-img does not
-        # agree. It was forced to interpret as a qcow2, but returned no
-        # format information as a result.
-        imginfo.return_value.data_file = None
-        self.assertRaises(exception.ImageUnacceptable,
-                          images.fetch_to_raw, None, 'href123', '/no.path')
-        imginfo.assert_called_once_with('/no.path.part', format='qcow2')

From c72a8ba5026b7264dfffca755903d8d80401ca23 Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Wed, 17 Apr 2024 07:06:13 -0700
Subject: [PATCH 14/23] Check images with format_inspector for safety

It has been asserted that we should not be calling qemu-img info
on untrusted files. That means we need to know if they have a
backing_file, data_file or other unsafe configuration *before* we use
qemu-img to probe or convert them.

This grafts glance's format_inspector module into nova/images so we
can use it to check the file early for safety. The expectation is that
this will be moved to oslo.utils (or something) later and thus we will
just delete the file from nova and change our import when that happens.

NOTE: This includes whitespace changes from the glance version of
format_inspector.py because of autopep8 demands.

Conflicts:
  nova/conf/workarounds.py

NOTE(elod.illes): conflict is due to the following patch that is only
present in zed: Iab92124b5776a799c7f90d07281d28fcf191c8fe

Change-Id: Iaefbe41b4c4bf0cf95d8f621653fdf65062aaa59
Closes-Bug: #2059809
(cherry picked from commit 9cdce715945619fc851ab3f43c97fab4bae4e35a)
(cherry picked from commit f07fa55fd86726eeafcd4c0c687bc49dd4df9f4c)
(cherry picked from commit 0acf5ee7b5dfb6ff0f9a9745f5ad2a0ed2bf65bf)
(cherry picked from commit 67e5376dd64407f5aaf1ea5f8c896e356064a2c9)
(cherry picked from commit da352edceb74dbd715268f94516503042b48cc90)
(cherry picked from commit b8a3d56f2e27531cc735606fbe92b648a51e8d62)
---
 nova/image/format_inspector.py      |  70 ++------------
 nova/tests/unit/virt/test_images.py | 136 +++++++++++++++++++++++++++-
 nova/virt/images.py                 |   2 +-
 3 files changed, 143 insertions(+), 65 deletions(-)

diff --git a/nova/image/format_inspector.py b/nova/image/format_inspector.py
index 8e57d7ed2c4..268c98b99cb 100644
--- a/nova/image/format_inspector.py
+++ b/nova/image/format_inspector.py
@@ -368,23 +368,6 @@ def safety_check(self):
                 not self.has_unknown_features)
 
 
-class QEDInspector(FileInspector):
-    def __init__(self, tracing=False):
-        super().__init__(tracing)
-        self.new_region('header', CaptureRegion(0, 512))
-
-    @property
-    def format_match(self):
-        if not self.region('header').complete:
-            return False
-        return self.region('header').data.startswith(b'QED\x00')
-
-    def safety_check(self):
-        # QED format is not supported by anyone, but we want to detect it
-        # and mark it as just always unsafe.
-        return False
-
-
 # The VHD (or VPC as QEMU calls it) format consists of a big-endian
 # 512-byte "footer" at the beginning of the file with various
 # information, most of which does not matter to us:
@@ -888,52 +871,19 @@ def close(self):
             self._source.close()
 
 
-ALL_FORMATS = {
-    'raw': FileInspector,
-    'qcow2': QcowInspector,
-    'vhd': VHDInspector,
-    'vhdx': VHDXInspector,
-    'vmdk': VMDKInspector,
-    'vdi': VDIInspector,
-    'qed': QEDInspector,
-}
-
-
 def get_inspector(format_name):
     """Returns a FormatInspector class based on the given name.
 
     :param format_name: The name of the disk_format (raw, qcow2, etc).
     :returns: A FormatInspector or None if unsupported.
     """
-
-    return ALL_FORMATS.get(format_name)
-
-
-def detect_file_format(filename):
-    """Attempts to detect the format of a file.
-
-    This runs through a file one time, running all the known inspectors in
-    parallel. It stops reading the file once one of them matches or all of
-    them are sure they don't match.
-
-    Returns the FileInspector that matched, if any. None if 'raw'.
-    """
-    inspectors = {k: v() for k, v in ALL_FORMATS.items()}
-    with open(filename, 'rb') as f:
-        for chunk in chunked_reader(f):
-            for format, inspector in list(inspectors.items()):
-                try:
-                    inspector.eat_chunk(chunk)
-                except ImageFormatError:
-                    # No match, so stop considering this format
-                    inspectors.pop(format)
-                    continue
-                if (inspector.format_match and inspector.complete and
-                        format != 'raw'):
-                    # First complete match (other than raw) wins
-                    return inspector
-            if all(i.complete for i in inspectors.values()):
-                # If all the inspectors are sure they are not a match, avoid
-                # reading to the end of the file to settle on 'raw'.
-                break
-    return inspectors['raw']
+    formats = {
+        'raw': FileInspector,
+        'qcow2': QcowInspector,
+        'vhd': VHDInspector,
+        'vhdx': VHDXInspector,
+        'vmdk': VMDKInspector,
+        'vdi': VDIInspector,
+    }
+
+    return formats.get(format_name)
diff --git a/nova/tests/unit/virt/test_images.py b/nova/tests/unit/virt/test_images.py
index e0b08bb9f59..55943f7f308 100644
--- a/nova/tests/unit/virt/test_images.py
+++ b/nova/tests/unit/virt/test_images.py
@@ -21,6 +21,7 @@
 
 from nova.compute import utils as compute_utils
 from nova import exception
+from nova.image import format_inspector
 from nova import test
 from nova.virt import images
 
@@ -99,11 +100,17 @@ def test_qemu_img_info_with_disk_not_found(self, exists, mocked_execute):
         exists.assert_called_once_with(path)
         mocked_execute.assert_called_once()
 
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
     @mock.patch.object(images, 'convert_image',
                        side_effect=exception.ImageUnacceptable)
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
-    def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch):
+    def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch,
+                                 get_inspector, glance):
+        inspector = get_inspector.return_value.from_file.return_value
+        inspector.safety_check.return_value = True
+        glance.get.return_value = {'disk_format': 'qcow2'}
         qemu_img_info.backing_file = None
         qemu_img_info.file_format = 'qcow2'
         qemu_img_info.virtual_size = 20
@@ -112,12 +119,17 @@ def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch):
                                images.fetch_to_raw,
                                None, 'href123', '/no/path')
 
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
     @mock.patch.object(images, 'convert_image',
                        side_effect=exception.ImageUnacceptable)
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
     def test_fetch_to_raw_data_file(self, convert_image, qemu_img_info_fn,
-                                    fetch):
+                                    fetch, mock_gi, mock_glance):
+        mock_glance.get.return_value = {'disk_format': 'qcow2'}
+        inspector = mock_gi.return_value.from_file.return_value
+        inspector.safety_check.return_value = True
         # NOTE(danms): the above test needs the following line as well, as it
         # is broken without it.
         qemu_img_info = qemu_img_info_fn.return_value
@@ -130,12 +142,16 @@ def test_fetch_to_raw_data_file(self, convert_image, qemu_img_info_fn,
                                images.fetch_to_raw,
                                None, 'href123', '/no/path')
 
+    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch.object(images, 'IMAGE_API')
     @mock.patch('os.rename')
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
-    def test_fetch_to_raw_from_raw(self, fetch, qemu_img_info_fn, mock_rename):
+    def test_fetch_to_raw_from_raw(self, fetch, qemu_img_info_fn, mock_rename,
+                                   mock_glance, mock_gi):
         # Make sure we support a case where we fetch an already-raw image and
         # qemu-img returns None for "format_specific".
+        mock_glance.get.return_value = {'disk_format': 'raw'}
         qemu_img_info = qemu_img_info_fn.return_value
         qemu_img_info.file_format = 'raw'
         qemu_img_info.backing_file = None
@@ -198,9 +214,15 @@ def test_convert_image_vmdk_allowed_list_checking(self):
                           imageutils.QemuImgInfo(jsonutils.dumps(info),
                                                  format='json'))
 
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
     @mock.patch.object(images, 'fetch')
     @mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info')
-    def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch):
+    def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch, mock_gi,
+                                     mock_glance):
+        mock_glance.get.return_value = {'disk_format': 'vmdk'}
+        inspector = mock_gi.return_value.from_file.return_value
+        inspector.safety_check.return_value = True
         info = {'format': 'vmdk',
                 'format-specific': {
                     'type': 'vmdk',
@@ -212,3 +234,109 @@ def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch):
             e = self.assertRaises(exception.ImageUnacceptable,
                                   images.fetch_to_raw, None, 'foo', 'anypath')
             self.assertIn('Invalid VMDK create-type specified', str(e))
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch.object(images, 'qemu_img_info')
+    @mock.patch.object(images, 'fetch')
+    def test_fetch_to_raw_inspector(self, fetch, qemu_img_info, mock_gi,
+                                    mock_glance):
+        # Image claims to be qcow2, is qcow2, but fails safety check, so we
+        # abort before qemu-img-info
+        mock_glance.get.return_value = {'disk_format': 'qcow2'}
+        inspector = mock_gi.return_value.from_file.return_value
+        inspector.safety_check.return_value = False
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        qemu_img_info.assert_not_called()
+        mock_gi.assert_called_once_with('qcow2')
+        mock_gi.return_value.from_file.assert_called_once_with('/no.path.part')
+        inspector.safety_check.assert_called_once_with()
+        mock_glance.get.assert_called_once_with(None, 'href123')
+
+        # Image claims to be qcow2, is qcow2, passes safety check, so we make
+        # it all the way to qemu-img-info
+        inspector.safety_check.return_value = True
+        qemu_img_info.side_effect = test.TestingException
+        self.assertRaises(test.TestingException,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+
+        # Image claims to be qcow2 in glance, but the image is something else,
+        # so we abort before qemu-img-info
+        qemu_img_info.reset_mock()
+        mock_gi.reset_mock()
+        inspector.safety_check.reset_mock()
+        mock_gi.return_value.from_file.side_effect = (
+            format_inspector.ImageFormatError)
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        mock_gi.assert_called_once_with('qcow2')
+        inspector.safety_check.assert_not_called()
+        qemu_img_info.assert_not_called()
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch.object(images, 'qemu_img_info')
+    @mock.patch.object(images, 'fetch')
+    def test_fetch_to_raw_inspector_disabled(self, fetch, qemu_img_info,
+                                             mock_gi, mock_glance):
+        self.flags(disable_deep_image_inspection=True,
+                   group='workarounds')
+        qemu_img_info.side_effect = test.TestingException
+        self.assertRaises(test.TestingException,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        # If deep inspection is disabled, we should never call the inspector
+        mock_gi.assert_not_called()
+        # ... and we let qemu-img detect the format itself.
+        qemu_img_info.assert_called_once_with('/no.path.part',
+                                              format=None)
+        mock_glance.get.assert_not_called()
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch.object(images, 'qemu_img_info')
+    def test_fetch_inspect_ami(self, imginfo, glance):
+        glance.get.return_value = {'disk_format': 'ami'}
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        # Make sure 'ami was translated into 'raw' before we call qemu-img
+        imginfo.assert_called_once_with('/no.path.part', format='raw')
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch.object(images, 'qemu_img_info')
+    def test_fetch_inspect_aki(self, imginfo, glance):
+        glance.get.return_value = {'disk_format': 'aki'}
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        # Make sure 'aki was translated into 'raw' before we call qemu-img
+        imginfo.assert_called_once_with('/no.path.part', format='raw')
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch.object(images, 'qemu_img_info')
+    def test_fetch_inspect_ari(self, imginfo, glance):
+        glance.get.return_value = {'disk_format': 'ari'}
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        # Make sure 'aki was translated into 'raw' before we call qemu-img
+        imginfo.assert_called_once_with('/no.path.part', format='raw')
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch.object(images, 'qemu_img_info')
+    def test_fetch_inspect_unknown_format(self, imginfo, glance):
+        glance.get.return_value = {'disk_format': 'commodore-64-disk'}
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        # Unsupported formats do not make it past deep inspection
+        imginfo.assert_not_called()
+
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch.object(images, 'qemu_img_info')
+    @mock.patch('nova.image.format_inspector.get_inspector')
+    def test_fetch_inspect_disagrees_qemu(self, mock_gi, imginfo, glance):
+        glance.get.return_value = {'disk_format': 'qcow2'}
+        # Glance and inspector think it is a qcow2 file, but qemu-img does not
+        # agree. It was forced to interpret as a qcow2, but returned no
+        # format information as a result.
+        imginfo.return_value.data_file = None
+        self.assertRaises(exception.ImageUnacceptable,
+                          images.fetch_to_raw, None, 'href123', '/no.path')
+        imginfo.assert_called_once_with('/no.path.part', format='qcow2')
diff --git a/nova/virt/images.py b/nova/virt/images.py
index 8376985c111..5ec0dc0b6ba 100644
--- a/nova/virt/images.py
+++ b/nova/virt/images.py
@@ -160,7 +160,7 @@ def do_image_deep_inspection(img, image_href, path):
         # No inspector was found
         LOG.warning('Unable to perform deep image inspection on type %r',
                     img['disk_format'])
-        if disk_format == 'ami':
+        if disk_format in ('ami', 'aki', 'ari'):
             # A lot of things can be in a UEC, although it is typically a raw
             # filesystem. We really have nothing we can do other than treat it
             # like a 'raw', which is what qemu-img will detect a filesystem as

From e9eb0329ebb2aeee6efeb0217adbb55ebfd5a4ec Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Mon, 24 Jun 2024 09:09:36 -0700
Subject: [PATCH 15/23] Additional qemu safety checking on base images

There is an additional way we can be fooled into using a qcow2 file
with a data-file, which is uploading it as raw to glance and then
booting an instance from it. Because when we go to create the
ephemeral disk from a cached base image, we've lost the information
about the original source's format, we probe the image's file type
without a strict format specified. If a qcow2 file is listed in
glance as a raw, we won't notice it until it is too late.

This brings over another piece of code (proposed against) glance's
format inspector which provides a safe format detection routine. This
patch uses that to detect the format of and run a safety check on the
base image each time we go to use it to create an ephemeral disk
image from it.

This also detects QED files and always marks them as unsafe as we do
not support that format at all. Since we could be fooled into
downloading one and passing it to qemu-img if we don't recognize it,
we need to detect and reject it as unsafe.

Conflicts:
  nova/tests/unit/virt/libvirt/test_utils.py
  nova/virt/libvirt/utils.py

NOTE(elod.illes): conflicts are due to patch to consolidate image
creation functions (I111cfc8a5eae27b15c6312957255fcf973038ddf) is only
introduced in zed.

Change-Id: I4881c8cbceb30c1ff2d2b859c554e0d02043f1f5
(cherry picked from commit b1b88bf001757546fbbea959f4b73cb344407dfb)
(cherry picked from commit 8a0d5f2afaf40c4554419a0b2488ce092eda7a1a)
(cherry picked from commit 0269234dc42fe2c320dc4696123cf5132642f9b7)
(cherry picked from commit 9e10ac25490e7b5353cb01e768d22eb5a1f92825)
(cherry picked from commit 303c2c9644c45d2f04461b6e9e2ef8a3273d3be8)
(cherry picked from commit e7bdaac1b6b14530a9eefb718f32c37f72096c2a)
---
 nova/image/format_inspector.py             | 70 ++++++++++++++++++----
 nova/tests/unit/virt/libvirt/test_utils.py | 48 +++++++++++++--
 nova/virt/libvirt/utils.py                 |  3 +-
 3 files changed, 104 insertions(+), 17 deletions(-)

diff --git a/nova/image/format_inspector.py b/nova/image/format_inspector.py
index 268c98b99cb..8e57d7ed2c4 100644
--- a/nova/image/format_inspector.py
+++ b/nova/image/format_inspector.py
@@ -368,6 +368,23 @@ def safety_check(self):
                 not self.has_unknown_features)
 
 
+class QEDInspector(FileInspector):
+    def __init__(self, tracing=False):
+        super().__init__(tracing)
+        self.new_region('header', CaptureRegion(0, 512))
+
+    @property
+    def format_match(self):
+        if not self.region('header').complete:
+            return False
+        return self.region('header').data.startswith(b'QED\x00')
+
+    def safety_check(self):
+        # QED format is not supported by anyone, but we want to detect it
+        # and mark it as just always unsafe.
+        return False
+
+
 # The VHD (or VPC as QEMU calls it) format consists of a big-endian
 # 512-byte "footer" at the beginning of the file with various
 # information, most of which does not matter to us:
@@ -871,19 +888,52 @@ def close(self):
             self._source.close()
 
 
+ALL_FORMATS = {
+    'raw': FileInspector,
+    'qcow2': QcowInspector,
+    'vhd': VHDInspector,
+    'vhdx': VHDXInspector,
+    'vmdk': VMDKInspector,
+    'vdi': VDIInspector,
+    'qed': QEDInspector,
+}
+
+
 def get_inspector(format_name):
     """Returns a FormatInspector class based on the given name.
 
     :param format_name: The name of the disk_format (raw, qcow2, etc).
     :returns: A FormatInspector or None if unsupported.
     """
-    formats = {
-        'raw': FileInspector,
-        'qcow2': QcowInspector,
-        'vhd': VHDInspector,
-        'vhdx': VHDXInspector,
-        'vmdk': VMDKInspector,
-        'vdi': VDIInspector,
-    }
-
-    return formats.get(format_name)
+
+    return ALL_FORMATS.get(format_name)
+
+
+def detect_file_format(filename):
+    """Attempts to detect the format of a file.
+
+    This runs through a file one time, running all the known inspectors in
+    parallel. It stops reading the file once one of them matches or all of
+    them are sure they don't match.
+
+    Returns the FileInspector that matched, if any. None if 'raw'.
+    """
+    inspectors = {k: v() for k, v in ALL_FORMATS.items()}
+    with open(filename, 'rb') as f:
+        for chunk in chunked_reader(f):
+            for format, inspector in list(inspectors.items()):
+                try:
+                    inspector.eat_chunk(chunk)
+                except ImageFormatError:
+                    # No match, so stop considering this format
+                    inspectors.pop(format)
+                    continue
+                if (inspector.format_match and inspector.complete and
+                        format != 'raw'):
+                    # First complete match (other than raw) wins
+                    return inspector
+            if all(i.complete for i in inspectors.values()):
+                # If all the inspectors are sure they are not a match, avoid
+                # reading to the end of the file to settle on 'raw'.
+                break
+    return inspectors['raw']
diff --git a/nova/tests/unit/virt/libvirt/test_utils.py b/nova/tests/unit/virt/libvirt/test_utils.py
index 8d348918ce8..a49bf723ffa 100644
--- a/nova/tests/unit/virt/libvirt/test_utils.py
+++ b/nova/tests/unit/virt/libvirt/test_utils.py
@@ -118,14 +118,26 @@ def test_create_image(self, mock_execute):
     @mock.patch('oslo_concurrency.processutils.execute')
     @mock.patch('nova.virt.images.qemu_img_info')
     @mock.patch('nova.image.format_inspector.detect_file_format')
-    def test_create_cow_image(self, mock_detect, mock_info, mock_execute,
-                              mock_exists):
+    def _test_create_cow_image(
+        self, mock_detect, mock_info, mock_execute,
+        mock_exists, backing_file=None, safety_check=True
+    ):
+        if isinstance(backing_file, dict):
+            backing_info = backing_file
+            backing_file = backing_info.pop('file', None)
+        else:
+            backing_info = {}
+        backing_backing_file = backing_info.pop('backing_file', None)
+
         mock_execute.return_value = ('stdout', None)
         mock_info.return_value = mock.Mock(
             file_format=mock.sentinel.backing_fmt,
             cluster_size=mock.sentinel.cluster_size,
-            backing_file=None)
-        mock_detect.return_value.safety_check.return_value = True
+            backing_file=backing_backing_file,
+            format_specific=backing_info)
+
+        mock_detect.return_value.safety_check.return_value = safety_check
+
         libvirt_utils.create_cow_image(mock.sentinel.backing_path,
                                        mock.sentinel.new_path)
         mock_info.assert_called_once_with(mock.sentinel.backing_path)
@@ -135,7 +147,33 @@ def test_create_cow_image(self, mock_detect, mock_info, mock_execute,
                 mock.sentinel.backing_path, mock.sentinel.backing_fmt,
                 mock.sentinel.cluster_size),
              mock.sentinel.new_path)])
-        mock_detect.return_value.safety_check.assert_called_once_with()
+        if backing_file:
+            mock_detect.return_value.safety_check.assert_called_once_with()
+
+    def test_create_image_qcow2(self):
+        self._test_create_cow_image()
+
+    def test_create_image_backing_file(self):
+        self._test_create_cow_image(
+            backing_file=mock.sentinel.backing_file
+        )
+
+    def test_create_image_base_has_backing_file(self):
+        self.assertRaises(
+            exception.InvalidDiskInfo,
+            self._test_create_cow_image,
+            backing_file={'file': mock.sentinel.backing_file,
+                          'backing_file': mock.sentinel.backing_backing_file},
+        )
+
+    def test_create_image_base_has_data_file(self):
+        self.assertRaises(
+            exception.InvalidDiskInfo,
+            self._test_create_cow_image,
+            backing_file={'file': mock.sentinel.backing_file,
+                          'backing_file': mock.sentinel.backing_backing_file,
+                          'data': {'data-file': mock.sentinel.data_file}},
+        )
 
     @ddt.unpack
     @ddt.data({'fs_type': 'some_fs_type',
diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py
index 93c5b38cb77..1d6f7f4e1d4 100644
--- a/nova/virt/libvirt/utils.py
+++ b/nova/virt/libvirt/utils.py
@@ -155,8 +155,7 @@ def create_cow_image(
                     reason=_('Base image failed safety check'))
 
         base_details = images.qemu_img_info(backing_file)
-        if base_details.file_format == 'vmdk':
-            images.check_vmdk_image('base', base_details)
+
         if base_details.backing_file is not None:
             LOG.warning('Base image %s failed safety check', backing_file)
             raise exception.InvalidDiskInfo(

From f94bbc10db5eb070baa99c3849455da2c8dbb845 Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Mon, 1 Jul 2024 09:06:40 -0700
Subject: [PATCH 16/23] Fix vmdk_allowed_types checking

This restores the vmdk_allowed_types checking in create_image()
that was unintentionally lost by tightening the
qemu-type-matches-glance code in the fetch patch recently. Since we
are still detecting the format of base images without metadata, we
would have treated a vmdk file that claims to be raw as raw in fetch,
but then read it like a vmdk once it was used as a base image for
something else.

Conflicts:
  nova/tests/unit/virt/libvirt/test_utils.py
  nova/virt/libvirt/utils.py

NOTE(elod.illes): conflicts are due to patch to consolidate image
creation functions (I111cfc8a5eae27b15c6312957255fcf973038ddf) is only
introduced in zed.

Change-Id: I07b332a7edb814f6a91661651d9d24bfd6651ae7
Related-Bug: #2059809
(cherry picked from commit 08be7b2a0dc1d7728d8034bc2aab0428c4fb642e)
(cherry picked from commit 11301e7e3f0d81a3368632f90608e30d9c647111)
(cherry picked from commit 70a435fd519a0ebcc3ac9ad5254fefbf19c93e48)
(cherry picked from commit f732f8476851e6272d8ad9937f54b918795844e8)
(cherry picked from commit a2acb31d790e6cb41c067bfc0343bde274c9428c)
(cherry picked from commit 3ba8ee16116e6a721413a382bbd4bcb68355cdf0)
---
 nova/tests/unit/virt/libvirt/test_utils.py | 28 ++++++++++++++++++++--
 nova/virt/libvirt/utils.py                 |  3 ++-
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/nova/tests/unit/virt/libvirt/test_utils.py b/nova/tests/unit/virt/libvirt/test_utils.py
index a49bf723ffa..8a43c45f34b 100644
--- a/nova/tests/unit/virt/libvirt/test_utils.py
+++ b/nova/tests/unit/virt/libvirt/test_utils.py
@@ -128,10 +128,12 @@ def _test_create_cow_image(
         else:
             backing_info = {}
         backing_backing_file = backing_info.pop('backing_file', None)
+        backing_fmt = backing_info.pop('backing_fmt',
+                                       mock.sentinel.backing_fmt)
 
         mock_execute.return_value = ('stdout', None)
         mock_info.return_value = mock.Mock(
-            file_format=mock.sentinel.backing_fmt,
+            file_format=backing_fmt,
             cluster_size=mock.sentinel.cluster_size,
             backing_file=backing_backing_file,
             format_specific=backing_info)
@@ -144,7 +146,7 @@ def _test_create_cow_image(
         mock_execute.assert_has_calls([mock.call(
             'qemu-img', 'create', '-f', 'qcow2', '-o',
             'backing_file=%s,backing_fmt=%s,cluster_size=%s' % (
-                mock.sentinel.backing_path, mock.sentinel.backing_fmt,
+                mock.sentinel.backing_path, backing_fmt,
                 mock.sentinel.cluster_size),
              mock.sentinel.new_path)])
         if backing_file:
@@ -175,6 +177,28 @@ def test_create_image_base_has_data_file(self):
                           'data': {'data-file': mock.sentinel.data_file}},
         )
 
+    def test_create_image_size_none(self):
+        self._test_create_cow_image(
+            backing_file=mock.sentinel.backing_file,
+        )
+
+    def test_create_image_vmdk(self):
+        self._test_create_cow_image(
+            backing_file={'file': mock.sentinel.backing_file,
+                          'backing_fmt': 'vmdk',
+                          'backing_file': None,
+                          'data': {'create-type': 'monolithicSparse'}}
+        )
+
+    def test_create_image_vmdk_invalid_type(self):
+        self.assertRaises(exception.ImageUnacceptable,
+            self._test_create_cow_image,
+            backing_file={'file': mock.sentinel.backing_file,
+                          'backing_fmt': 'vmdk',
+                          'backing_file': None,
+                          'data': {'create-type': 'monolithicFlat'}}
+        )
+
     @ddt.unpack
     @ddt.data({'fs_type': 'some_fs_type',
                'default_eph_format': None,
diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py
index 1d6f7f4e1d4..93c5b38cb77 100644
--- a/nova/virt/libvirt/utils.py
+++ b/nova/virt/libvirt/utils.py
@@ -155,7 +155,8 @@ def create_cow_image(
                     reason=_('Base image failed safety check'))
 
         base_details = images.qemu_img_info(backing_file)
-
+        if base_details.file_format == 'vmdk':
+            images.check_vmdk_image('base', base_details)
         if base_details.backing_file is not None:
             LOG.warning('Base image %s failed safety check', backing_file)
             raise exception.InvalidDiskInfo(

From bc900dce0d1dfd249e3b7c57fcb889374de1c4b9 Mon Sep 17 00:00:00 2001
From: Sean Mooney <work@seanmooney.info>
Date: Thu, 4 Jul 2024 12:38:39 +0100
Subject: [PATCH 17/23] port format inspector tests from glance

This commit is a direct port of the format inspector
unit tests from glance as of commit
0d8e79b713bc31a78f0f4eac14ee594ca8520999

the only changes to the test are as follows

"from glance.common import format_inspector" was updated to
"from nova.image import format_inspector"

"from glance.tests import utils as test_utils"
was replaced with "from nova import test"

"test_utils.BaseTestCase" was replaced with "test.NoDBTestCase"

"glance-unittest-formatinspector-" was replaced with
"nova-unittest-formatinspector-"

This makes the test funtional in nova.

TestFormatInspectors requries qemu-img to be installed on the
host which would be a new depency for executing unit tests.
to avoid that we skip TestFormatInspectors if qemu-img
is not installed.
TestFormatInspectorInfra and TestFormatInspectorsTargeted
do not have a qemu-img dependency so
no changes to the test assertions were required.

Change-Id: Ia34203f246f0bc574e11476287dfb33fda7954fe
(cherry picked from commit 838daa3cad5fb3cdd10fb7aa76c647330a66939e)
(cherry picked from commit 66205be426028f8b7d16163ca6901bc181d703b6)
(cherry picked from commit 497abea5a189cc7043766273e9d17571f722190a)
(cherry picked from commit 58cd955c7d4848ed8da71f3c0352a5303cae6200)
(cherry picked from commit d7e3d722cd6c59968cbfe1d7a3bd7021c90165e5)
---
 .../tests/unit/image/test_format_inspector.py | 517 ++++++++++++++++++
 1 file changed, 517 insertions(+)
 create mode 100644 nova/tests/unit/image/test_format_inspector.py

diff --git a/nova/tests/unit/image/test_format_inspector.py b/nova/tests/unit/image/test_format_inspector.py
new file mode 100644
index 00000000000..4bda796ea42
--- /dev/null
+++ b/nova/tests/unit/image/test_format_inspector.py
@@ -0,0 +1,517 @@
+# Copyright 2020 Red Hat, Inc
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import io
+import os
+import re
+import struct
+import subprocess
+import tempfile
+from unittest import mock
+
+from oslo_utils import units
+
+from nova.image import format_inspector
+from nova import test
+
+
+def get_size_from_qemu_img(filename):
+    output = subprocess.check_output('qemu-img info "%s"' % filename,
+                                     shell=True)
+    for line in output.split(b'\n'):
+        m = re.search(b'^virtual size: .* .([0-9]+) bytes', line.strip())
+        if m:
+            return int(m.group(1))
+
+    raise Exception('Could not find virtual size with qemu-img')
+
+
+class TestFormatInspectors(test.NoDBTestCase):
+    def setUp(self):
+        super(TestFormatInspectors, self).setUp()
+        # these tests depend on qemu-img being installed
+        # and in the path, if it is not installed, skip
+        try:
+            subprocess.check_output('qemu-img --version', shell=True)
+        except Exception:
+            self.skipTest('qemu-img not installed')
+
+        self._created_files = []
+
+    def tearDown(self):
+        super(TestFormatInspectors, self).tearDown()
+        for fn in self._created_files:
+            try:
+                os.remove(fn)
+            except Exception:
+                pass
+
+    def _create_img(self, fmt, size, subformat=None, options=None,
+                    backing_file=None):
+        if fmt == 'vhd':
+            # QEMU calls the vhd format vpc
+            fmt = 'vpc'
+
+        if options is None:
+            options = {}
+        opt = ''
+        prefix = 'nova-unittest-formatinspector-'
+
+        if subformat:
+            options['subformat'] = subformat
+            prefix += subformat + '-'
+
+        if options:
+            opt += '-o ' + ','.join('%s=%s' % (k, v)
+                                    for k, v in options.items())
+
+        if backing_file is not None:
+            opt += ' -b %s -F raw' % backing_file
+
+        fn = tempfile.mktemp(prefix=prefix,
+                             suffix='.%s' % fmt)
+        self._created_files.append(fn)
+        subprocess.check_output(
+            'qemu-img create -f %s %s %s %i' % (fmt, opt, fn, size),
+            shell=True)
+        return fn
+
+    def _create_allocated_vmdk(self, size_mb, subformat=None):
+        # We need a "big" VMDK file to exercise some parts of the code of the
+        # format_inspector. A way to create one is to first create an empty
+        # file, and then to convert it with the -S 0 option.
+
+        if subformat is None:
+            # Matches qemu-img default, see `qemu-img convert -O vmdk -o help`
+            subformat = 'monolithicSparse'
+
+        prefix = 'nova-unittest-formatinspector-%s-' % subformat
+        fn = tempfile.mktemp(prefix=prefix, suffix='.vmdk')
+        self._created_files.append(fn)
+        raw = tempfile.mktemp(prefix=prefix, suffix='.raw')
+        self._created_files.append(raw)
+
+        # Create a file with pseudo-random data, otherwise it will get
+        # compressed in the streamOptimized format
+        subprocess.check_output(
+            'dd if=/dev/urandom of=%s bs=1M count=%i' % (raw, size_mb),
+            shell=True)
+
+        # Convert it to VMDK
+        subprocess.check_output(
+            'qemu-img convert -f raw -O vmdk -o subformat=%s -S 0 %s %s' % (
+                subformat, raw, fn),
+            shell=True)
+        return fn
+
+    def _test_format_at_block_size(self, format_name, img, block_size):
+        fmt = format_inspector.get_inspector(format_name)()
+        self.assertIsNotNone(fmt,
+                             'Did not get format inspector for %s' % (
+                                 format_name))
+        wrapper = format_inspector.InfoWrapper(open(img, 'rb'), fmt)
+
+        while True:
+            chunk = wrapper.read(block_size)
+            if not chunk:
+                break
+
+        wrapper.close()
+        return fmt
+
+    def _test_format_at_image_size(self, format_name, image_size,
+                                   subformat=None):
+        img = self._create_img(format_name, image_size, subformat=subformat)
+
+        # Some formats have internal alignment restrictions making this not
+        # always exactly like image_size, so get the real value for comparison
+        virtual_size = get_size_from_qemu_img(img)
+
+        # Read the format in various sizes, some of which will read whole
+        # sections in a single read, others will be completely unaligned, etc.
+        for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
+            fmt = self._test_format_at_block_size(format_name, img, block_size)
+            self.assertTrue(fmt.format_match,
+                            'Failed to match %s at size %i block %i' % (
+                                format_name, image_size, block_size))
+            self.assertEqual(virtual_size, fmt.virtual_size,
+                             ('Failed to calculate size for %s at size %i '
+                              'block %i') % (format_name, image_size,
+                                             block_size))
+            memory = sum(fmt.context_info.values())
+            self.assertLess(memory, 512 * units.Ki,
+                            'Format used more than 512KiB of memory: %s' % (
+                                fmt.context_info))
+
+    def _test_format(self, format_name, subformat=None):
+        # Try a few different image sizes, including some odd and very small
+        # sizes
+        for image_size in (512, 513, 2057, 7):
+            self._test_format_at_image_size(format_name, image_size * units.Mi,
+                                            subformat=subformat)
+
+    def test_qcow2(self):
+        self._test_format('qcow2')
+
+    def test_vhd(self):
+        self._test_format('vhd')
+
+    def test_vhdx(self):
+        self._test_format('vhdx')
+
+    def test_vmdk(self):
+        self._test_format('vmdk')
+
+    def test_vmdk_stream_optimized(self):
+        self._test_format('vmdk', 'streamOptimized')
+
+    def test_from_file_reads_minimum(self):
+        img = self._create_img('qcow2', 10 * units.Mi)
+        file_size = os.stat(img).st_size
+        fmt = format_inspector.QcowInspector.from_file(img)
+        # We know everything we need from the first 512 bytes of a QCOW image,
+        # so make sure that we did not read the whole thing when we inspect
+        # a local file.
+        self.assertLess(fmt.actual_size, file_size)
+
+    def test_qed_always_unsafe(self):
+        img = self._create_img('qed', 10 * units.Mi)
+        fmt = format_inspector.get_inspector('qed').from_file(img)
+        self.assertTrue(fmt.format_match)
+        self.assertFalse(fmt.safety_check())
+
+    def _test_vmdk_bad_descriptor_offset(self, subformat=None):
+        format_name = 'vmdk'
+        image_size = 10 * units.Mi
+        descriptorOffsetAddr = 0x1c
+        BAD_ADDRESS = 0x400
+        img = self._create_img(format_name, image_size, subformat=subformat)
+
+        # Corrupt the header
+        fd = open(img, 'r+b')
+        fd.seek(descriptorOffsetAddr)
+        fd.write(struct.pack('<Q', BAD_ADDRESS // 512))
+        fd.close()
+
+        # Read the format in various sizes, some of which will read whole
+        # sections in a single read, others will be completely unaligned, etc.
+        for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
+            fmt = self._test_format_at_block_size(format_name, img, block_size)
+            self.assertTrue(fmt.format_match,
+                            'Failed to match %s at size %i block %i' % (
+                                format_name, image_size, block_size))
+            self.assertEqual(0, fmt.virtual_size,
+                             ('Calculated a virtual size for a corrupt %s at '
+                              'size %i block %i') % (format_name, image_size,
+                                                     block_size))
+
+    def test_vmdk_bad_descriptor_offset(self):
+        self._test_vmdk_bad_descriptor_offset()
+
+    def test_vmdk_bad_descriptor_offset_stream_optimized(self):
+        self._test_vmdk_bad_descriptor_offset(subformat='streamOptimized')
+
+    def _test_vmdk_bad_descriptor_mem_limit(self, subformat=None):
+        format_name = 'vmdk'
+        image_size = 5 * units.Mi
+        virtual_size = 5 * units.Mi
+        descriptorOffsetAddr = 0x1c
+        descriptorSizeAddr = descriptorOffsetAddr + 8
+        twoMBInSectors = (2 << 20) // 512
+        # We need a big VMDK because otherwise we will not have enough data to
+        # fill-up the CaptureRegion.
+        img = self._create_allocated_vmdk(image_size // units.Mi,
+                                          subformat=subformat)
+
+        # Corrupt the end of descriptor address so it "ends" at 2MB
+        fd = open(img, 'r+b')
+        fd.seek(descriptorSizeAddr)
+        fd.write(struct.pack('<Q', twoMBInSectors))
+        fd.close()
+
+        # Read the format in various sizes, some of which will read whole
+        # sections in a single read, others will be completely unaligned, etc.
+        for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
+            fmt = self._test_format_at_block_size(format_name, img, block_size)
+            self.assertTrue(fmt.format_match,
+                            'Failed to match %s at size %i block %i' % (
+                                format_name, image_size, block_size))
+            self.assertEqual(virtual_size, fmt.virtual_size,
+                             ('Failed to calculate size for %s at size %i '
+                              'block %i') % (format_name, image_size,
+                                             block_size))
+            memory = sum(fmt.context_info.values())
+            self.assertLess(memory, 1.5 * units.Mi,
+                            'Format used more than 1.5MiB of memory: %s' % (
+                                fmt.context_info))
+
+    def test_vmdk_bad_descriptor_mem_limit(self):
+        self._test_vmdk_bad_descriptor_mem_limit()
+
+    def test_vmdk_bad_descriptor_mem_limit_stream_optimized(self):
+        self._test_vmdk_bad_descriptor_mem_limit(subformat='streamOptimized')
+
+    def test_qcow2_safety_checks(self):
+        # Create backing and data-file names (and initialize the backing file)
+        backing_fn = tempfile.mktemp(prefix='backing')
+        self._created_files.append(backing_fn)
+        with open(backing_fn, 'w') as f:
+            f.write('foobar')
+        data_fn = tempfile.mktemp(prefix='data')
+        self._created_files.append(data_fn)
+
+        # A qcow with no backing or data file is safe
+        fn = self._create_img('qcow2', 5 * units.Mi, None)
+        inspector = format_inspector.QcowInspector.from_file(fn)
+        self.assertTrue(inspector.safety_check())
+
+        # A backing file makes it unsafe
+        fn = self._create_img('qcow2', 5 * units.Mi, None,
+                              backing_file=backing_fn)
+        inspector = format_inspector.QcowInspector.from_file(fn)
+        self.assertFalse(inspector.safety_check())
+
+        # A data-file makes it unsafe
+        fn = self._create_img('qcow2', 5 * units.Mi,
+                              options={'data_file': data_fn,
+                                       'data_file_raw': 'on'})
+        inspector = format_inspector.QcowInspector.from_file(fn)
+        self.assertFalse(inspector.safety_check())
+
+        # Trying to load a non-QCOW file is an error
+        self.assertRaises(format_inspector.ImageFormatError,
+                          format_inspector.QcowInspector.from_file,
+                          backing_fn)
+
+    def test_qcow2_feature_flag_checks(self):
+        data = bytearray(512)
+        data[0:4] = b'QFI\xFB'
+        inspector = format_inspector.QcowInspector()
+        inspector.region('header').data = data
+
+        # All zeros, no feature flags - all good
+        self.assertFalse(inspector.has_unknown_features)
+
+        # A feature flag set in the first byte (highest-order) is not
+        # something we know about, so fail.
+        data[0x48] = 0x01
+        self.assertTrue(inspector.has_unknown_features)
+
+        # The first bit in the last byte (lowest-order) is known (the dirty
+        # bit) so that should pass
+        data[0x48] = 0x00
+        data[0x4F] = 0x01
+        self.assertFalse(inspector.has_unknown_features)
+
+        # Currently (as of 2024), the high-order feature flag bit in the low-
+        # order byte is not assigned, so make sure we reject it.
+        data[0x4F] = 0x80
+        self.assertTrue(inspector.has_unknown_features)
+
+    def test_vdi(self):
+        self._test_format('vdi')
+
+    def _test_format_with_invalid_data(self, format_name):
+        fmt = format_inspector.get_inspector(format_name)()
+        wrapper = format_inspector.InfoWrapper(open(__file__, 'rb'), fmt)
+        while True:
+            chunk = wrapper.read(32)
+            if not chunk:
+                break
+
+        wrapper.close()
+        self.assertFalse(fmt.format_match)
+        self.assertEqual(0, fmt.virtual_size)
+        memory = sum(fmt.context_info.values())
+        self.assertLess(memory, 512 * units.Ki,
+                        'Format used more than 512KiB of memory: %s' % (
+                            fmt.context_info))
+
+    def test_qcow2_invalid(self):
+        self._test_format_with_invalid_data('qcow2')
+
+    def test_vhd_invalid(self):
+        self._test_format_with_invalid_data('vhd')
+
+    def test_vhdx_invalid(self):
+        self._test_format_with_invalid_data('vhdx')
+
+    def test_vmdk_invalid(self):
+        self._test_format_with_invalid_data('vmdk')
+
+    def test_vdi_invalid(self):
+        self._test_format_with_invalid_data('vdi')
+
+    def test_vmdk_invalid_type(self):
+        fmt = format_inspector.get_inspector('vmdk')()
+        wrapper = format_inspector.InfoWrapper(open(__file__, 'rb'), fmt)
+        while True:
+            chunk = wrapper.read(32)
+            if not chunk:
+                break
+
+        wrapper.close()
+
+        fake_rgn = mock.MagicMock()
+        fake_rgn.complete = True
+        fake_rgn.data = b'foocreateType="someunknownformat"bar'
+
+        with mock.patch.object(fmt, 'has_region', return_value=True):
+            with mock.patch.object(fmt, 'region', return_value=fake_rgn):
+                self.assertEqual(0, fmt.virtual_size)
+
+
+class TestFormatInspectorInfra(test.NoDBTestCase):
+    def _test_capture_region_bs(self, bs):
+        data = b''.join(chr(x).encode() for x in range(ord('A'), ord('z')))
+
+        regions = [
+            format_inspector.CaptureRegion(3, 9),
+            format_inspector.CaptureRegion(0, 256),
+            format_inspector.CaptureRegion(32, 8),
+        ]
+
+        for region in regions:
+            # None of them should be complete yet
+            self.assertFalse(region.complete)
+
+        pos = 0
+        for i in range(0, len(data), bs):
+            chunk = data[i:i + bs]
+            pos += len(chunk)
+            for region in regions:
+                region.capture(chunk, pos)
+
+        self.assertEqual(data[3:12], regions[0].data)
+        self.assertEqual(data[0:256], regions[1].data)
+        self.assertEqual(data[32:40], regions[2].data)
+
+        # The small regions should be complete
+        self.assertTrue(regions[0].complete)
+        self.assertTrue(regions[2].complete)
+
+        # This region extended past the available data, so not complete
+        self.assertFalse(regions[1].complete)
+
+    def test_capture_region(self):
+        for block_size in (1, 3, 7, 13, 32, 64):
+            self._test_capture_region_bs(block_size)
+
+    def _get_wrapper(self, data):
+        source = io.BytesIO(data)
+        fake_fmt = mock.create_autospec(format_inspector.get_inspector('raw'))
+        return format_inspector.InfoWrapper(source, fake_fmt)
+
+    def test_info_wrapper_file_like(self):
+        data = b''.join(chr(x).encode() for x in range(ord('A'), ord('z')))
+        wrapper = self._get_wrapper(data)
+
+        read_data = b''
+        while True:
+            chunk = wrapper.read(8)
+            if not chunk:
+                break
+            read_data += chunk
+
+        self.assertEqual(data, read_data)
+
+    def test_info_wrapper_iter_like(self):
+        data = b''.join(chr(x).encode() for x in range(ord('A'), ord('z')))
+        wrapper = self._get_wrapper(data)
+
+        read_data = b''
+        for chunk in wrapper:
+            read_data += chunk
+
+        self.assertEqual(data, read_data)
+
+    def test_info_wrapper_file_like_eats_error(self):
+        wrapper = self._get_wrapper(b'123456')
+        wrapper._format.eat_chunk.side_effect = Exception('fail')
+
+        data = b''
+        while True:
+            chunk = wrapper.read(3)
+            if not chunk:
+                break
+            data += chunk
+
+        # Make sure we got all the data despite the error
+        self.assertEqual(b'123456', data)
+
+        # Make sure we only called this once and never again after
+        # the error was raised
+        wrapper._format.eat_chunk.assert_called_once_with(b'123')
+
+    def test_info_wrapper_iter_like_eats_error(self):
+        fake_fmt = mock.create_autospec(format_inspector.get_inspector('raw'))
+        wrapper = format_inspector.InfoWrapper(iter([b'123', b'456']),
+                                               fake_fmt)
+        fake_fmt.eat_chunk.side_effect = Exception('fail')
+
+        data = b''
+        for chunk in wrapper:
+            data += chunk
+
+        # Make sure we got all the data despite the error
+        self.assertEqual(b'123456', data)
+
+        # Make sure we only called this once and never again after
+        # the error was raised
+        fake_fmt.eat_chunk.assert_called_once_with(b'123')
+
+    def test_get_inspector(self):
+        self.assertEqual(format_inspector.QcowInspector,
+                         format_inspector.get_inspector('qcow2'))
+        self.assertIsNone(format_inspector.get_inspector('foo'))
+
+
+class TestFormatInspectorsTargeted(test.NoDBTestCase):
+    def _make_vhd_meta(self, guid_raw, item_length):
+        # Meta region header, padded to 32 bytes
+        data = struct.pack('<8sHH', b'metadata', 0, 1)
+        data += b'0' * 20
+
+        # Metadata table entry, 16-byte GUID, 12-byte information,
+        # padded to 32-bytes
+        data += guid_raw
+        data += struct.pack('<III', 256, item_length, 0)
+        data += b'0' * 6
+
+        return data
+
+    def test_vhd_table_over_limit(self):
+        ins = format_inspector.VHDXInspector()
+        meta = format_inspector.CaptureRegion(0, 0)
+        desired = b'012345678ABCDEF0'
+        # This is a poorly-crafted image that specifies a larger table size
+        # than is allowed
+        meta.data = self._make_vhd_meta(desired, 33 * 2048)
+        ins.new_region('metadata', meta)
+        new_region = ins._find_meta_entry(ins._guid(desired))
+        # Make sure we clamp to our limit of 32 * 2048
+        self.assertEqual(
+            format_inspector.VHDXInspector.VHDX_METADATA_TABLE_MAX_SIZE,
+            new_region.length)
+
+    def test_vhd_table_under_limit(self):
+        ins = format_inspector.VHDXInspector()
+        meta = format_inspector.CaptureRegion(0, 0)
+        desired = b'012345678ABCDEF0'
+        meta.data = self._make_vhd_meta(desired, 16 * 2048)
+        ins.new_region('metadata', meta)
+        new_region = ins._find_meta_entry(ins._guid(desired))
+        # Table size was under the limit, make sure we get it back
+        self.assertEqual(16 * 2048, new_region.length)

From ae7b92d428be72bdb3e03aee001914f98061546e Mon Sep 17 00:00:00 2001
From: Sean Mooney <work@seanmooney.info>
Date: Thu, 4 Jul 2024 13:55:41 +0100
Subject: [PATCH 18/23] Reproduce iso regression with deep format inspection

This change adds a reproducer for the regression in iso
file support when
workarounds.disable_deep_image_inspection = False

Change-Id: I56d8b9980b4871941ba5de91e60a7df6a40106a8
(cherry picked from commit b5a1d3b4b2d0aaa351479b1d7e41a3895c28fab0)
(cherry picked from commit 3a6d9a038fad2bd58bdf4fb87af04158301a6929)
(cherry picked from commit 000b435a44e905122a45d3b137a576c60bf42a58)
(cherry picked from commit 1233d7b935c018e79728c5691216fa2569affe08)
(cherry picked from commit fb86ca6cf02d93810cc5503765c3b707f70bd5c0)
---
 .../tests/unit/image/test_format_inspector.py | 72 ++++++++++++++++---
 1 file changed, 63 insertions(+), 9 deletions(-)

diff --git a/nova/tests/unit/image/test_format_inspector.py b/nova/tests/unit/image/test_format_inspector.py
index 4bda796ea42..9bd99c03cca 100644
--- a/nova/tests/unit/image/test_format_inspector.py
+++ b/nova/tests/unit/image/test_format_inspector.py
@@ -27,6 +27,9 @@
 from nova import test
 
 
+TEST_IMAGE_PREFIX = 'nova-unittest-formatinspector-'
+
+
 def get_size_from_qemu_img(filename):
     output = subprocess.check_output('qemu-img info "%s"' % filename,
                                      shell=True)
@@ -41,13 +44,6 @@ def get_size_from_qemu_img(filename):
 class TestFormatInspectors(test.NoDBTestCase):
     def setUp(self):
         super(TestFormatInspectors, self).setUp()
-        # these tests depend on qemu-img being installed
-        # and in the path, if it is not installed, skip
-        try:
-            subprocess.check_output('qemu-img --version', shell=True)
-        except Exception:
-            self.skipTest('qemu-img not installed')
-
         self._created_files = []
 
     def tearDown(self):
@@ -58,8 +54,55 @@ def tearDown(self):
             except Exception:
                 pass
 
+    def _create_iso(self, image_size, subformat='iso-9660'):
+        # these tests depend on mkisofs
+        # being installed and in the path,
+        # if it is not installed, skip
+        try:
+            subprocess.check_output('mkisofs --version', shell=True)
+        except Exception:
+            self.skipTest('mkisofs not installed')
+
+        size = image_size // units.Mi
+        base_cmd = "mkisofs"
+        if subformat == 'udf':
+            # depending on the distribution mkisofs may not support udf
+            # and may be provided by genisoimage instead. As a result we
+            # need to check if the command supports udf via help
+            # instead of checking the installed version.
+            # mkisofs --help outputs to stderr so we need to
+            # redirect it to stdout to use grep.
+            try:
+                subprocess.check_output(
+                    'mkisofs --help 2>&1 | grep udf', shell=True)
+            except Exception:
+                self.skipTest('mkisofs does not support udf format')
+            base_cmd += " -udf"
+        prefix = TEST_IMAGE_PREFIX
+        prefix += '-%s-' % subformat
+        fn = tempfile.mktemp(prefix=prefix, suffix='.iso')
+        self._created_files.append(fn)
+        subprocess.check_output(
+            'dd if=/dev/zero of=%s bs=1M count=%i' % (fn, size),
+            shell=True)
+        subprocess.check_output(
+            '%s -o %s -V "TEST" -J -r %s' % (base_cmd, fn, fn),
+            shell=True)
+        return fn
+
     def _create_img(self, fmt, size, subformat=None, options=None,
                     backing_file=None):
+        if fmt == 'iso':
+            return self._create_iso(size, subformat)
+
+        # these tests depend on qemu-img
+        # being installed and in the path,
+        # if it is not installed, skip
+        try:
+            subprocess.check_output('qemu-img --version', shell=True)
+        except Exception:
+            self.skipTest('qemu-img not installed')
+
         if fmt == 'vhd':
             # QEMU calls the vhd format vpc
             fmt = 'vpc'
@@ -67,7 +110,7 @@ def _create_img(self, fmt, size, subformat=None, options=None,
         if options is None:
             options = {}
         opt = ''
-        prefix = 'nova-unittest-formatinspector-'
+        prefix = TEST_IMAGE_PREFIX
 
         if subformat:
             options['subformat'] = subformat
@@ -97,7 +140,8 @@ def _create_allocated_vmdk(self, size_mb, subformat=None):
             # Matches qemu-img default, see `qemu-img convert -O vmdk -o help`
             subformat = 'monolithicSparse'
 
-        prefix = 'nova-unittest-formatinspector-%s-' % subformat
+        prefix = TEST_IMAGE_PREFIX
+        prefix += '-%s-' % subformat
         fn = tempfile.mktemp(prefix=prefix, suffix='.vmdk')
         self._created_files.append(fn)
         raw = tempfile.mktemp(prefix=prefix, suffix='.raw')
@@ -165,6 +209,16 @@ def _test_format(self, format_name, subformat=None):
     def test_qcow2(self):
         self._test_format('qcow2')
 
+    def test_iso_9660(self):
+        # reproduce iso-9660 format regression
+        self.assertRaises(
+            TypeError, self._test_format, 'iso', subformat='iso-9660')
+
+    def test_udf(self):
+        # reproduce udf format regression
+        self.assertRaises(
+            TypeError, self._test_format, 'iso', subformat='udf')
+
     def test_vhd(self):
         self._test_format('vhd')
 

From 63758759e750dc3a072e08353e7450f4b1b3135b Mon Sep 17 00:00:00 2001
From: Sean Mooney <work@seanmooney.info>
Date: Thu, 4 Jul 2024 20:09:31 +0100
Subject: [PATCH 19/23] Add iso file format inspector

This change includes unit tests for the ISO
format inspector using mkisofs to generate
the iso files.

A test for stashing qcow content in the system_area
of an iso file is also included.

This change modifies format_inspector.detect_file_format
to evaluate all inspectors until they are complete and
raise an InvalidDiskInfo exception if multiple formats
match.

Related-Bug: #2059809
Change-Id: I7e12718fb3e1f77eb8d1cfcb9fa64e8ddeb9e712
(cherry picked from commit b1cc39848ebe9b9cb63141a647bda52a2842ee4b)
(cherry picked from commit eeda7c333c773216c216159926673874ce4843ba)
(cherry picked from commit 24628ecbbe9d5fdd4fe6767ca92395f0d3da9e48)
(cherry picked from commit 65f0789df05e2ba7f11c0eaf2c6959367acbced2)
(cherry picked from commit e8f00617ed319aa37f6946cf10883eef6d180612)
---
 nova/image/format_inspector.py                | 109 +++++++++++++++++-
 .../tests/unit/image/test_format_inspector.py | 106 ++++++++++++++---
 nova/tests/unit/virt/test_images.py           |  28 +++++
 nova/virt/images.py                           |   5 +
 4 files changed, 230 insertions(+), 18 deletions(-)

diff --git a/nova/image/format_inspector.py b/nova/image/format_inspector.py
index 8e57d7ed2c4..49cb75930a9 100644
--- a/nova/image/format_inspector.py
+++ b/nova/image/format_inspector.py
@@ -24,6 +24,7 @@
 import struct
 
 from oslo_log import log as logging
+from oslo_utils import units
 
 LOG = logging.getLogger(__name__)
 
@@ -843,6 +844,93 @@ def __str__(self):
         return 'vdi'
 
 
+class ISOInspector(FileInspector):
+    """ISO 9660 and UDF format
+
+    we need to check the first 32KB + descriptor size
+    to look for the ISO 9660 or UDF signature.
+
+    http://wiki.osdev.org/ISO_9660
+    http://wiki.osdev.org/UDF
+    mkisofs --help  | grep udf
+
+    The Universal Disc Format or UDF is the filesystem used on DVDs and
+    Blu-Ray discs.UDF is an extension of ISO 9660 and shares the same
+    header structure and initial layout.
+
+    Like the CDFS(ISO 9660) file system,
+    the UDF file system uses a 2048 byte sector size,
+    and it designates that the first 16 sectors can be used by the OS
+    to store proprietary data or boot logic.
+
+    That means we need to check the first 32KB + descriptor size
+    to look for the ISO 9660 or UDF signature.
+    both formats have an extent based layout, so we can't determine
+    ahead of time where the descriptor will be located.
+
+    fortunately, the ISO 9660 and UDF formats have a Primary Volume Descriptor
+    located at the beginning of the image, which contains the volume size.
+
+    """
+
+    def __init__(self, *a, **k):
+        super(ISOInspector, self).__init__(*a, **k)
+        self.new_region('system_area', CaptureRegion(0, 32 * units.Ki))
+        self.new_region('header', CaptureRegion(32 * units.Ki, 2 * units.Ki))
+
+    @property
+    def format_match(self):
+        if not self.complete:
+            return False
+        signature = self.region('header').data[1:6]
+        assert len(signature) == 5
+        return signature in (b'CD001', b'NSR02', b'NSR03')
+
+    @property
+    def virtual_size(self):
+        if not self.complete:
+            return 0
+        if not self.format_match:
+            return 0
+
+        # the header size is 2KB or 1 sector
+        # the first header field is the descriptor type which is 1 byte
+        # the second field is the standard identifier which is 5 bytes
+        # the third field is the version which is 1 byte
+        # the rest of the header contains type specific data is 2041 bytes
+        # see http://wiki.osdev.org/ISO_9660#The_Primary_Volume_Descriptor
+
+        # we need to check that the descriptor type is 1
+        # to ensure that this is a primary volume descriptor
+        descriptor_type = self.region('header').data[0]
+        if descriptor_type != 1:
+            return 0
+        # The size in bytes of a logical block is stored at offset 128
+        # and is 2 bytes long encoded in both little and big endian
+        # int16_LSB-MSB so the field is 4 bytes long
+        logical_block_size_data = self.region('header').data[128:132]
+        assert len(logical_block_size_data) == 4
+        # given the encoding we only need to read half the field so we
+        # can use the first 2 bytes which are the little endian part
+        # this is normally 2048 or 2KB but we need to check as it can be
+        # different according to the ISO 9660 standard.
+        logical_block_size, = struct.unpack('<H', logical_block_size_data[:2])
+        # The volume space size is the total number of logical blocks
+        # and is stored at offset 80 and is 8 bytes long
+        # as with the logical block size the field is encoded in both
+        # little and big endian as an int32_LSB-MSB
+        volume_space_size_data = self.region('header').data[80:88]
+        assert len(volume_space_size_data) == 8
+        # given the encoding we only need to read half the field so we
+        # can use the first 4 bytes which are the little endian part
+        volume_space_size, = struct.unpack('<L', volume_space_size_data[:4])
+        # the virtual size is the volume space size * logical block size
+        return volume_space_size * logical_block_size
+
+    def __str__(self):
+        return 'iso'
+
+
 class InfoWrapper(object):
     """A file-like object that wraps another and updates a format inspector.
 
@@ -896,6 +984,7 @@ def close(self):
     'vmdk': VMDKInspector,
     'vdi': VDIInspector,
     'qed': QEDInspector,
+    'iso': ISOInspector,
 }
 
 
@@ -913,12 +1002,15 @@ def detect_file_format(filename):
     """Attempts to detect the format of a file.
 
     This runs through a file one time, running all the known inspectors in
-    parallel. It stops reading the file once one of them matches or all of
+    parallel. It stops reading the file once all of them matches or all of
     them are sure they don't match.
 
-    Returns the FileInspector that matched, if any. None if 'raw'.
+    :param filename: The path to the file to inspect.
+    :returns: A FormatInspector instance matching the file.
+    :raises: ImageFormatError if multiple formats are detected.
     """
     inspectors = {k: v() for k, v in ALL_FORMATS.items()}
+    detections = []
     with open(filename, 'rb') as f:
         for chunk in chunked_reader(f):
             for format, inspector in list(inspectors.items()):
@@ -930,10 +1022,17 @@ def detect_file_format(filename):
                     continue
                 if (inspector.format_match and inspector.complete and
                         format != 'raw'):
-                    # First complete match (other than raw) wins
-                    return inspector
+                    # record all match (other than raw)
+                    detections.append(inspector)
+                    inspectors.pop(format)
             if all(i.complete for i in inspectors.values()):
                 # If all the inspectors are sure they are not a match, avoid
                 # reading to the end of the file to settle on 'raw'.
                 break
-    return inspectors['raw']
+
+    if len(detections) > 1:
+        all_formats = [str(inspector) for inspector in detections]
+        raise ImageFormatError(
+            'Multiple formats detected: %s' % ', '.join(all_formats))
+
+    return inspectors['raw'] if not detections else detections[0]
diff --git a/nova/tests/unit/image/test_format_inspector.py b/nova/tests/unit/image/test_format_inspector.py
index 9bd99c03cca..40012503207 100644
--- a/nova/tests/unit/image/test_format_inspector.py
+++ b/nova/tests/unit/image/test_format_inspector.py
@@ -54,7 +54,13 @@ def tearDown(self):
             except Exception:
                 pass
 
-    def _create_iso(self, image_size, subformat='iso-9660'):
+    def _create_iso(self, image_size, subformat='9660'):
+        """Create an ISO file of the given size.
+
+        :param image_size: The size of the image to create in bytes
+        :param subformat: The subformat to use, if any
+        """
+
         # these tests depend on mkisofs
         # being installed and in the path,
         # if it is not installed, skip
@@ -86,12 +92,22 @@ def _create_iso(self, image_size, subformat='iso-9660'):
             'dd if=/dev/zero of=%s bs=1M count=%i' % (fn, size),
             shell=True)
         subprocess.check_output(
-            '%s -o %s -V "TEST" -J -r %s' % (base_cmd, fn, fn),
+            '%s -V "TEST" -o %s  %s' % (base_cmd, fn, fn),
             shell=True)
         return fn
 
-    def _create_img(self, fmt, size, subformat=None, options=None,
-                    backing_file=None):
+    def _create_img(
+            self, fmt, size, subformat=None, options=None,
+            backing_file=None):
+        """Create an image file of the given format and size.
+
+        :param fmt: The format to create
+        :param size: The size of the image to create in bytes
+        :param subformat: The subformat to use, if any
+        :param options: A dictionary of options to pass to the format
+        :param backing_file: The backing file to use, if any
+        """
+
         if fmt == 'iso':
             return self._create_iso(size, subformat)
 
@@ -177,6 +193,13 @@ def _test_format_at_block_size(self, format_name, img, block_size):
 
     def _test_format_at_image_size(self, format_name, image_size,
                                    subformat=None):
+        """Test the format inspector for the given format at the
+        given image size.
+
+        :param format_name: The format to test
+        :param image_size: The size of the image to create in bytes
+        :param subformat: The subformat to use, if any
+        """
         img = self._create_img(format_name, image_size, subformat=subformat)
 
         # Some formats have internal alignment restrictions making this not
@@ -185,7 +208,15 @@ def _test_format_at_image_size(self, format_name, image_size,
 
         # Read the format in various sizes, some of which will read whole
         # sections in a single read, others will be completely unaligned, etc.
-        for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
+        block_sizes = [64 * units.Ki, 1 * units.Mi]
+        # ISO images have a 32KB system area at the beginning of the image
+        # as a result reading that in 17 or 512 byte blocks takes too long,
+        # causing the test to fail. The 64KiB block size is enough to read
+        # the system area and header in a single read. the 1MiB block size
+        # adds very little time to the test so we include it.
+        if format_name != 'iso':
+            block_sizes.extend([17, 512])
+        for block_size in block_sizes:
             fmt = self._test_format_at_block_size(format_name, img, block_size)
             self.assertTrue(fmt.format_match,
                             'Failed to match %s at size %i block %i' % (
@@ -210,14 +241,63 @@ def test_qcow2(self):
         self._test_format('qcow2')
 
     def test_iso_9660(self):
-        # reproduce iso-9660 format regression
-        self.assertRaises(
-            TypeError, self._test_format, 'iso', subformat='iso-9660')
-
-    def test_udf(self):
-        # reproduce udf format regression
-        self.assertRaises(
-            TypeError, self._test_format, 'iso', subformat='udf')
+        self._test_format('iso', subformat='9660')
+
+    def test_iso_udf(self):
+        self._test_format('iso', subformat='udf')
+
+    def _generate_bad_iso(self):
+        # we want to emulate a malicious user who uploads a an
+        # ISO file has a qcow2 header in the system area
+        # of the ISO file
+        # we will create a qcow2 image and an ISO file
+        # and then copy the qcow2 header to the ISO file
+        # e.g.
+        #   mkisofs -o orig.iso /etc/resolv.conf
+        #   qemu-img create orig.qcow2 -f qcow2 64M
+        #   dd if=orig.qcow2 of=outcome bs=32K count=1
+        #   dd if=orig.iso of=outcome bs=32K skip=1 seek=1
+
+        qcow = self._create_img('qcow2', 10 * units.Mi)
+        iso = self._create_iso(64 * units.Mi, subformat='9660')
+        # first ensure the files are valid
+        iso_fmt = self._test_format_at_block_size('iso', iso, 4 * units.Ki)
+        self.assertTrue(iso_fmt.format_match)
+        qcow_fmt = self._test_format_at_block_size('qcow2', qcow, 4 * units.Ki)
+        self.assertTrue(qcow_fmt.format_match)
+        # now copy the qcow2 header to an ISO file
+        prefix = TEST_IMAGE_PREFIX
+        prefix += '-bad-'
+        fn = tempfile.mktemp(prefix=prefix, suffix='.iso')
+        self._created_files.append(fn)
+        subprocess.check_output(
+            'dd if=%s of=%s bs=32K count=1' % (qcow, fn),
+            shell=True)
+        subprocess.check_output(
+            'dd if=%s of=%s bs=32K skip=1 seek=1' % (iso, fn),
+            shell=True)
+        return qcow, iso, fn
+
+    def test_bad_iso_qcow2(self):
+
+        _, _, fn = self._generate_bad_iso()
+
+        iso_check = self._test_format_at_block_size('iso', fn, 4 * units.Ki)
+        qcow_check = self._test_format_at_block_size('qcow2', fn, 4 * units.Ki)
+        # this system area of the ISO file is not considered part of the format
+        # the qcow2 header is in the system area of the ISO file
+        # so the ISO file is still valid
+        self.assertTrue(iso_check.format_match)
+        # the qcow2 header is in the system area of the ISO file
+        # but that will be parsed by the qcow2 format inspector
+        # and it will match
+        self.assertTrue(qcow_check.format_match)
+        # if we call format_inspector.detect_file_format it should detect
+        # and raise an exception because both match internally.
+        e = self.assertRaises(
+            format_inspector.ImageFormatError,
+            format_inspector.detect_file_format, fn)
+        self.assertIn('Multiple formats detected', str(e))
 
     def test_vhd(self):
         self._test_format('vhd')
diff --git a/nova/tests/unit/virt/test_images.py b/nova/tests/unit/virt/test_images.py
index 55943f7f308..8649644eb5f 100644
--- a/nova/tests/unit/virt/test_images.py
+++ b/nova/tests/unit/virt/test_images.py
@@ -235,6 +235,34 @@ def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch, mock_gi,
                                   images.fetch_to_raw, None, 'foo', 'anypath')
             self.assertIn('Invalid VMDK create-type specified', str(e))
 
+    @mock.patch('os.rename')
+    @mock.patch.object(images, 'IMAGE_API')
+    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch.object(images, 'fetch')
+    @mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info')
+    def test_fetch_iso_is_raw(self, mock_info, mock_fetch, mock_gi,
+                              mock_glance, mock_rename):
+        mock_glance.get.return_value = {'disk_format': 'iso'}
+        inspector = mock_gi.return_value.from_file.return_value
+        inspector.safety_check.return_value = True
+        # qemu-img does not have a parser for iso so it is treated as raw
+        info = {
+            "virtual-size": 356352,
+            "filename": "foo.iso",
+            "format": "raw",
+            "actual-size": 356352,
+            "dirty-flag": False
+        }
+        mock_info.return_value = jsonutils.dumps(info)
+        with mock.patch('os.path.exists', return_value=True):
+            images.fetch_to_raw(None, 'foo', 'anypath')
+        # Make sure we called info with -f raw for an iso, since qemu-img does
+        # not support iso
+        mock_info.assert_called_once_with('anypath.part', format='raw')
+        # Make sure that since we considered this to be a raw file, we did the
+        # just-rename-don't-convert path
+        mock_rename.assert_called_once_with('anypath.part', 'anypath')
+
     @mock.patch.object(images, 'IMAGE_API')
     @mock.patch('nova.image.format_inspector.get_inspector')
     @mock.patch.object(images, 'qemu_img_info')
diff --git a/nova/virt/images.py b/nova/virt/images.py
index 5ec0dc0b6ba..813696ed7d7 100644
--- a/nova/virt/images.py
+++ b/nova/virt/images.py
@@ -171,6 +171,11 @@ def do_image_deep_inspection(img, image_href, path):
             raise exception.ImageUnacceptable(
                 image_id=image_href,
                 reason=_('Image not in a supported format'))
+
+    if disk_format == 'iso':
+        # ISO image passed safety check; qemu will treat this as raw from here
+        disk_format = 'raw'
+
     return disk_format
 
 

From d9b8dad4367a947ced0718b8d90636abd2b8e6fd Mon Sep 17 00:00:00 2001
From: Sean Mooney <work@seanmooney.info>
Date: Tue, 9 Jul 2024 15:09:09 +0100
Subject: [PATCH 20/23] fix qemu-img version dependent tests

while backporting Ia34203f246f0bc574e11476287dfb33fda7954fe

We observed that several of the tests showed distro specific
behavior depending on if qemu was installed in the test env,
what version is installed and how it was compiled

This change ensures that if qemu is present that it
supprot the required formats otherwise it skips the test.

Change-Id: I131996cdd7aaf1f52d4caac33b153753ff6db869
(cherry picked from commit cc2514d02e0b0ebaf60a46d02732f7f8facc3191)
(cherry picked from commit ae10fde55b113bc0a34bc69ff63bab809bc98ef3)
(cherry picked from commit bb2645e92c98da0e02d650dab5ab90cafcbb824b)
(cherry picked from commit 673103fd63a516dad3f6da14b95d34f9dd605c21)
(cherry picked from commit dae4230fcc1c5539ecab52eb5f7755cc844420cd)
---
 .../tests/unit/image/test_format_inspector.py | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/nova/tests/unit/image/test_format_inspector.py b/nova/tests/unit/image/test_format_inspector.py
index 40012503207..a8e688b8eb3 100644
--- a/nova/tests/unit/image/test_format_inspector.py
+++ b/nova/tests/unit/image/test_format_inspector.py
@@ -111,18 +111,22 @@ def _create_img(
         if fmt == 'iso':
             return self._create_iso(size, subformat)
 
-        # these tests depend on qemu-img
-        # being installed and in the path,
-        # if it is not installed, skip
-        try:
-            subprocess.check_output('qemu-img --version', shell=True)
-        except Exception:
-            self.skipTest('qemu-img not installed')
-
         if fmt == 'vhd':
             # QEMU calls the vhd format vpc
             fmt = 'vpc'
 
+        # these tests depend on qemu-img being installed and in the path,
+        # if it is not installed, skip. we also need to ensure that the
+        # format is supported by qemu-img, this can vary depending on the
+        # distribution so we need to check if the format is supported via
+        # the help output.
+        try:
+            subprocess.check_output(
+                'qemu-img --help | grep %s' % fmt, shell=True)
+        except Exception:
+            self.skipTest(
+                'qemu-img not installed or does not support %s format' % fmt)
+
         if options is None:
             options = {}
         opt = ''

From ea57c1d7fb5fbb895229958f4a61df9333aa3a88 Mon Sep 17 00:00:00 2001
From: Balazs Gibizer <gibi@redhat.com>
Date: Thu, 11 Jul 2024 07:29:40 +0200
Subject: [PATCH 21/23] Stabilize iso format unit tests

Some version of mkisofs does not properly handle if both the input and
the output file of the command are the same. So this commit changes the
unit tests depending on that binary to use a different files.

Related-Bug: #2059809
Change-Id: I6924eb23ff5804c22a48ec6fabcec25f061906bb
(cherry picked from commit c6d8c6972d52845774b36acb84cd08a4b2e4dcde)
(cherry picked from commit a8783a767551df3dd943bd862cdba35c51cdb7a6)
(cherry picked from commit 02147b36d35e1e462e1405c36a2e67a33de806de)
(cherry picked from commit 47428f6caf503b94583dac614b59971f60a0ba9c)
(cherry picked from commit 11613e7b3244958fa8d0b5253a185287d1ade2d8)
---
 nova/tests/unit/image/test_format_inspector.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/nova/tests/unit/image/test_format_inspector.py b/nova/tests/unit/image/test_format_inspector.py
index a8e688b8eb3..8406dfca378 100644
--- a/nova/tests/unit/image/test_format_inspector.py
+++ b/nova/tests/unit/image/test_format_inspector.py
@@ -91,10 +91,15 @@ def _create_iso(self, image_size, subformat='9660'):
         subprocess.check_output(
             'dd if=/dev/zero of=%s bs=1M count=%i' % (fn, size),
             shell=True)
+        # We need to use different file as input and output as the behavior
+        # of mkisofs is version dependent if both the input and the output
+        # are the same and can cause test failures
+        out_fn = "%s.iso" % fn
         subprocess.check_output(
-            '%s -V "TEST" -o %s  %s' % (base_cmd, fn, fn),
+            '%s -V "TEST" -o %s  %s' % (base_cmd, out_fn, fn),
             shell=True)
-        return fn
+        self._created_files.append(out_fn)
+        return out_fn
 
     def _create_img(
             self, fmt, size, subformat=None, options=None,

From 416ac36b8dcbeb3d65a6271d4ff3696e379c0ec7 Mon Sep 17 00:00:00 2001
From: Pierre Riteau <pierre@stackhpc.com>
Date: Tue, 23 Jul 2024 18:17:41 +0200
Subject: [PATCH 22/23] CI: Install qemu-img for unit tests

Change-Id: I3b38344a127764b4fa62fe062e825074d6deab60
---
 bindep.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bindep.txt b/bindep.txt
index 3a4d7bef806..f774113bc26 100644
--- a/bindep.txt
+++ b/bindep.txt
@@ -55,3 +55,5 @@ pcre-devel [platform:rpm test]
 # runtime and in unit tests. Net result is the same that lsscsi will be
 # installed for any nova installation.
 lsscsi
+# NOTE(priteau): Install qemu-img for format inspector tests
+qemu-utils [platform:dpkg test]

From c6936bb6c9f0c5e45520d6bbaecb10812f91f954 Mon Sep 17 00:00:00 2001
From: Dan Smith <dansmith@redhat.com>
Date: Wed, 10 Jul 2024 14:23:33 +0100
Subject: [PATCH 23/23] Change force_format strategy to catch mismatches

When we moved the qemu-img command in fetch_to_raw() to force the
format to what we expect, we lost the ability to identify and react
to situations where qemu-img detected a file as a format that is not
supported by us (i.e. identfied and safety-checked by
format_inspector). In the case of some of the other VMDK variants
that we don't support, we need to be sure to catch any case where
qemu-img thinks it's something other than raw when we think it is,
which will be the case for those formats we don't support.

Note this also moves us from explicitly using the format_inspector
that we're told by glance is appropriate, to using our own detection.
We assert that we agree with glance and as above, qemu agrees with
us. This helps us avoid cases where the uploader lies about the
image format, causing us to not run the appropriate safety check.
AMI formats are a liability here since we have a very hard time
asserting what they are and what they will be detected as later in
the pipeline, so there is still special-casing for those.

Closes-Bug: #2071734
Change-Id: I4b792c5bc959a904854c21565682ed3a687baa1a
(cherry picked from commit 8b4c522f6699514e7d1f20ac25cf426af6ea588f)
(cherry picked from commit 8ef5ec9716c9edbd662ca27b6e39b7848b14f492)
(cherry picked from commit 45d948938314997ba400a5fc2bb48bc821c260ab)
(cherry picked from commit fbe429051e1fbcd494c71525870651e92e121449)
---
 nova/tests/unit/virt/libvirt/test_utils.py | 23 +++---
 nova/tests/unit/virt/test_images.py        | 96 +++++++++++++---------
 nova/virt/images.py                        | 62 +++++++++-----
 3 files changed, 108 insertions(+), 73 deletions(-)

diff --git a/nova/tests/unit/virt/libvirt/test_utils.py b/nova/tests/unit/virt/libvirt/test_utils.py
index 8a43c45f34b..0d0fad30b33 100644
--- a/nova/tests/unit/virt/libvirt/test_utils.py
+++ b/nova/tests/unit/virt/libvirt/test_utils.py
@@ -390,12 +390,12 @@ def test_fetch_initrd_image(self, mock_images):
             _context, image_id, target, trusted_certs)
 
     @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch.object(format_inspector, 'get_inspector')
+    @mock.patch.object(format_inspector, 'detect_file_format')
     @mock.patch.object(compute_utils, 'disk_ops_semaphore')
     @mock.patch('nova.privsep.utils.supports_direct_io', return_value=True)
     @mock.patch('nova.privsep.qemu.unprivileged_convert_image')
     def test_fetch_raw_image(self, mock_convert_image, mock_direct_io,
-                             mock_disk_op_sema, mock_gi, mock_glance):
+                             mock_disk_op_sema, mock_detect, mock_glance):
 
         def fake_rename(old, new):
             self.executes.append(('mv', old, new))
@@ -435,7 +435,7 @@ class FakeImgInfo(object):
         self.stub_out('oslo_utils.fileutils.delete_if_exists',
                       fake_rm_on_error)
 
-        mock_inspector = mock_gi.return_value.from_file.return_value
+        mock_inspector = mock_detect.return_value
 
         # Since the remove param of fileutils.remove_path_on_error()
         # is initialized at load time, we must provide a wrapper
@@ -449,6 +449,7 @@ class FakeImgInfo(object):
 
         # Make sure qcow2 gets converted to raw
         mock_inspector.safety_check.return_value = True
+        mock_inspector.__str__.return_value = 'qcow2'
         mock_glance.get.return_value = {'disk_format': 'qcow2'}
         target = 't.qcow2'
         self.executes = []
@@ -462,12 +463,13 @@ class FakeImgInfo(object):
             CONF.instances_path, False)
         mock_convert_image.reset_mock()
         mock_inspector.safety_check.assert_called_once_with()
-        mock_gi.assert_called_once_with('qcow2')
+        mock_detect.assert_called_once_with('t.qcow2.part')
 
         # Make sure raw does not get converted
-        mock_gi.reset_mock()
+        mock_detect.reset_mock()
         mock_inspector.safety_check.reset_mock()
         mock_inspector.safety_check.return_value = True
+        mock_inspector.__str__.return_value = 'raw'
         mock_glance.get.return_value = {'disk_format': 'raw'}
         target = 't.raw'
         self.executes = []
@@ -476,12 +478,13 @@ class FakeImgInfo(object):
         self.assertEqual(self.executes, expected_commands)
         mock_convert_image.assert_not_called()
         mock_inspector.safety_check.assert_called_once_with()
-        mock_gi.assert_called_once_with('raw')
+        mock_detect.assert_called_once_with('t.raw.part')
 
         # Make sure safety check failure prevents us from proceeding
-        mock_gi.reset_mock()
+        mock_detect.reset_mock()
         mock_inspector.safety_check.reset_mock()
         mock_inspector.safety_check.return_value = False
+        mock_inspector.__str__.return_value = 'qcow2'
         mock_glance.get.return_value = {'disk_format': 'qcow2'}
         target = 'backing.qcow2'
         self.executes = []
@@ -491,10 +494,10 @@ class FakeImgInfo(object):
         self.assertEqual(self.executes, expected_commands)
         mock_convert_image.assert_not_called()
         mock_inspector.safety_check.assert_called_once_with()
-        mock_gi.assert_called_once_with('qcow2')
+        mock_detect.assert_called_once_with('backing.qcow2.part')
 
         # Make sure a format mismatch prevents us from proceeding
-        mock_gi.reset_mock()
+        mock_detect.reset_mock()
         mock_inspector.safety_check.reset_mock()
         mock_inspector.safety_check.side_effect = (
             format_inspector.ImageFormatError)
@@ -507,7 +510,7 @@ class FakeImgInfo(object):
         self.assertEqual(self.executes, expected_commands)
         mock_convert_image.assert_not_called()
         mock_inspector.safety_check.assert_called_once_with()
-        mock_gi.assert_called_once_with('qcow2')
+        mock_detect.assert_called_once_with('backing.qcow2.part')
 
         del self.executes
 
diff --git a/nova/tests/unit/virt/test_images.py b/nova/tests/unit/virt/test_images.py
index 8649644eb5f..0f5cb7c1928 100644
--- a/nova/tests/unit/virt/test_images.py
+++ b/nova/tests/unit/virt/test_images.py
@@ -21,7 +21,6 @@
 
 from nova.compute import utils as compute_utils
 from nova import exception
-from nova.image import format_inspector
 from nova import test
 from nova.virt import images
 
@@ -101,15 +100,16 @@ def test_qemu_img_info_with_disk_not_found(self, exists, mocked_execute):
         mocked_execute.assert_called_once()
 
     @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(images, 'convert_image',
                        side_effect=exception.ImageUnacceptable)
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
     def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch,
-                                 get_inspector, glance):
-        inspector = get_inspector.return_value.from_file.return_value
+                                 mock_detect, glance):
+        inspector = mock_detect.return_value
         inspector.safety_check.return_value = True
+        inspector.__str__.return_value = 'qcow2'
         glance.get.return_value = {'disk_format': 'qcow2'}
         qemu_img_info.backing_file = None
         qemu_img_info.file_format = 'qcow2'
@@ -120,16 +120,17 @@ def test_fetch_to_raw_errors(self, convert_image, qemu_img_info, fetch,
                                None, 'href123', '/no/path')
 
     @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(images, 'convert_image',
                        side_effect=exception.ImageUnacceptable)
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
     def test_fetch_to_raw_data_file(self, convert_image, qemu_img_info_fn,
-                                    fetch, mock_gi, mock_glance):
+                                    fetch, mock_detect, mock_glance):
         mock_glance.get.return_value = {'disk_format': 'qcow2'}
-        inspector = mock_gi.return_value.from_file.return_value
+        inspector = mock_detect.return_value
         inspector.safety_check.return_value = True
+        inspector.__str__.return_value = 'qcow2'
         # NOTE(danms): the above test needs the following line as well, as it
         # is broken without it.
         qemu_img_info = qemu_img_info_fn.return_value
@@ -142,16 +143,17 @@ def test_fetch_to_raw_data_file(self, convert_image, qemu_img_info_fn,
                                images.fetch_to_raw,
                                None, 'href123', '/no/path')
 
-    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(images, 'IMAGE_API')
     @mock.patch('os.rename')
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
     def test_fetch_to_raw_from_raw(self, fetch, qemu_img_info_fn, mock_rename,
-                                   mock_glance, mock_gi):
+                                   mock_glance, mock_detect):
         # Make sure we support a case where we fetch an already-raw image and
         # qemu-img returns None for "format_specific".
         mock_glance.get.return_value = {'disk_format': 'raw'}
+        mock_detect.return_value.__str__.return_value = 'raw'
         qemu_img_info = qemu_img_info_fn.return_value
         qemu_img_info.file_format = 'raw'
         qemu_img_info.backing_file = None
@@ -215,14 +217,15 @@ def test_convert_image_vmdk_allowed_list_checking(self):
                                                  format='json'))
 
     @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(images, 'fetch')
     @mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info')
-    def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch, mock_gi,
+    def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch, mock_detect,
                                      mock_glance):
         mock_glance.get.return_value = {'disk_format': 'vmdk'}
-        inspector = mock_gi.return_value.from_file.return_value
+        inspector = mock_detect.return_value
         inspector.safety_check.return_value = True
+        inspector.__str__.return_value = 'vmdk'
         info = {'format': 'vmdk',
                 'format-specific': {
                     'type': 'vmdk',
@@ -238,13 +241,17 @@ def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch, mock_gi,
     @mock.patch('os.rename')
     @mock.patch.object(images, 'IMAGE_API')
     @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(images, 'fetch')
     @mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info')
-    def test_fetch_iso_is_raw(self, mock_info, mock_fetch, mock_gi,
-                              mock_glance, mock_rename):
+    def test_fetch_iso_is_raw(
+        self, mock_info, mock_fetch, mock_detect_file_format, mock_gi,
+        mock_glance, mock_rename):
         mock_glance.get.return_value = {'disk_format': 'iso'}
         inspector = mock_gi.return_value.from_file.return_value
         inspector.safety_check.return_value = True
+        inspector.__str__.return_value = 'iso'
+        mock_detect_file_format.return_value = inspector
         # qemu-img does not have a parser for iso so it is treated as raw
         info = {
             "virtual-size": 356352,
@@ -258,27 +265,27 @@ def test_fetch_iso_is_raw(self, mock_info, mock_fetch, mock_gi,
             images.fetch_to_raw(None, 'foo', 'anypath')
         # Make sure we called info with -f raw for an iso, since qemu-img does
         # not support iso
-        mock_info.assert_called_once_with('anypath.part', format='raw')
+        mock_info.assert_called_once_with('anypath.part', format=None)
         # Make sure that since we considered this to be a raw file, we did the
         # just-rename-don't-convert path
         mock_rename.assert_called_once_with('anypath.part', 'anypath')
 
     @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
-    def test_fetch_to_raw_inspector(self, fetch, qemu_img_info, mock_gi,
+    def test_fetch_to_raw_inspector(self, fetch, qemu_img_info, mock_detect,
                                     mock_glance):
         # Image claims to be qcow2, is qcow2, but fails safety check, so we
         # abort before qemu-img-info
         mock_glance.get.return_value = {'disk_format': 'qcow2'}
-        inspector = mock_gi.return_value.from_file.return_value
+        inspector = mock_detect.return_value
         inspector.safety_check.return_value = False
+        inspector.__str__.return_value = 'qcow2'
         self.assertRaises(exception.ImageUnacceptable,
                           images.fetch_to_raw, None, 'href123', '/no.path')
         qemu_img_info.assert_not_called()
-        mock_gi.assert_called_once_with('qcow2')
-        mock_gi.return_value.from_file.assert_called_once_with('/no.path.part')
+        mock_detect.assert_called_once_with('/no.path.part')
         inspector.safety_check.assert_called_once_with()
         mock_glance.get.assert_called_once_with(None, 'href123')
 
@@ -292,18 +299,17 @@ def test_fetch_to_raw_inspector(self, fetch, qemu_img_info, mock_gi,
         # Image claims to be qcow2 in glance, but the image is something else,
         # so we abort before qemu-img-info
         qemu_img_info.reset_mock()
-        mock_gi.reset_mock()
+        mock_detect.reset_mock()
         inspector.safety_check.reset_mock()
-        mock_gi.return_value.from_file.side_effect = (
-            format_inspector.ImageFormatError)
+        mock_detect.return_value.__str__.return_value = 'vmdk'
         self.assertRaises(exception.ImageUnacceptable,
                           images.fetch_to_raw, None, 'href123', '/no.path')
-        mock_gi.assert_called_once_with('qcow2')
-        inspector.safety_check.assert_not_called()
+        mock_detect.assert_called_once_with('/no.path.part')
+        inspector.safety_check.assert_called_once_with()
         qemu_img_info.assert_not_called()
 
     @mock.patch.object(images, 'IMAGE_API')
-    @mock.patch('nova.image.format_inspector.get_inspector')
+    @mock.patch('nova.image.format_inspector.detect_file_format')
     @mock.patch.object(images, 'qemu_img_info')
     @mock.patch.object(images, 'fetch')
     def test_fetch_to_raw_inspector_disabled(self, fetch, qemu_img_info,
@@ -316,36 +322,41 @@ def test_fetch_to_raw_inspector_disabled(self, fetch, qemu_img_info,
         # If deep inspection is disabled, we should never call the inspector
         mock_gi.assert_not_called()
         # ... and we let qemu-img detect the format itself.
-        qemu_img_info.assert_called_once_with('/no.path.part',
-                                              format=None)
+        qemu_img_info.assert_called_once_with('/no.path.part')
         mock_glance.get.assert_not_called()
 
     @mock.patch.object(images, 'IMAGE_API')
     @mock.patch.object(images, 'qemu_img_info')
-    def test_fetch_inspect_ami(self, imginfo, glance):
+    @mock.patch('nova.image.format_inspector.detect_file_format')
+    def test_fetch_inspect_ami(self, detect, imginfo, glance):
         glance.get.return_value = {'disk_format': 'ami'}
+        detect.return_value.__str__.return_value = 'raw'
         self.assertRaises(exception.ImageUnacceptable,
                           images.fetch_to_raw, None, 'href123', '/no.path')
         # Make sure 'ami was translated into 'raw' before we call qemu-img
-        imginfo.assert_called_once_with('/no.path.part', format='raw')
+        imginfo.assert_called_once_with('/no.path.part')
 
     @mock.patch.object(images, 'IMAGE_API')
     @mock.patch.object(images, 'qemu_img_info')
-    def test_fetch_inspect_aki(self, imginfo, glance):
+    @mock.patch('nova.image.format_inspector.detect_file_format')
+    def test_fetch_inspect_aki(self, detect, imginfo, glance):
         glance.get.return_value = {'disk_format': 'aki'}
+        detect.return_value.__str__.return_value = 'raw'
         self.assertRaises(exception.ImageUnacceptable,
                           images.fetch_to_raw, None, 'href123', '/no.path')
         # Make sure 'aki was translated into 'raw' before we call qemu-img
-        imginfo.assert_called_once_with('/no.path.part', format='raw')
+        imginfo.assert_called_once_with('/no.path.part')
 
     @mock.patch.object(images, 'IMAGE_API')
     @mock.patch.object(images, 'qemu_img_info')
-    def test_fetch_inspect_ari(self, imginfo, glance):
+    @mock.patch('nova.image.format_inspector.detect_file_format')
+    def test_fetch_inspect_ari(self, detect, imginfo, glance):
         glance.get.return_value = {'disk_format': 'ari'}
+        detect.return_value.__str__.return_value = 'raw'
         self.assertRaises(exception.ImageUnacceptable,
                           images.fetch_to_raw, None, 'href123', '/no.path')
         # Make sure 'aki was translated into 'raw' before we call qemu-img
-        imginfo.assert_called_once_with('/no.path.part', format='raw')
+        imginfo.assert_called_once_with('/no.path.part')
 
     @mock.patch.object(images, 'IMAGE_API')
     @mock.patch.object(images, 'qemu_img_info')
@@ -358,13 +369,16 @@ def test_fetch_inspect_unknown_format(self, imginfo, glance):
 
     @mock.patch.object(images, 'IMAGE_API')
     @mock.patch.object(images, 'qemu_img_info')
-    @mock.patch('nova.image.format_inspector.get_inspector')
-    def test_fetch_inspect_disagrees_qemu(self, mock_gi, imginfo, glance):
+    @mock.patch('nova.image.format_inspector.detect_file_format')
+    def test_fetch_inspect_disagrees_qemu(self, mock_detect, imginfo, glance):
         glance.get.return_value = {'disk_format': 'qcow2'}
+        mock_detect.return_value.__str__.return_value = 'qcow2'
         # Glance and inspector think it is a qcow2 file, but qemu-img does not
-        # agree. It was forced to interpret as a qcow2, but returned no
-        # format information as a result.
+        # agree.
         imginfo.return_value.data_file = None
-        self.assertRaises(exception.ImageUnacceptable,
-                          images.fetch_to_raw, None, 'href123', '/no.path')
-        imginfo.assert_called_once_with('/no.path.part', format='qcow2')
+        imginfo.return_value.file_format = 'vmdk'
+        ex = self.assertRaises(exception.ImageUnacceptable,
+                               images.fetch_to_raw,
+                               None, 'href123', '/no.path')
+        self.assertIn('content does not match disk_format', str(ex))
+        imginfo.assert_called_once_with('/no.path.part')
diff --git a/nova/virt/images.py b/nova/virt/images.py
index 813696ed7d7..193c80fb636 100644
--- a/nova/virt/images.py
+++ b/nova/virt/images.py
@@ -140,42 +140,50 @@ def check_vmdk_image(image_id, data):
 
 
 def do_image_deep_inspection(img, image_href, path):
+    ami_formats = ('ami', 'aki', 'ari')
     disk_format = img['disk_format']
     try:
         # NOTE(danms): Use our own cautious inspector module to make sure
         # the image file passes safety checks.
         # See https://bugs.launchpad.net/nova/+bug/2059809 for details.
-        inspector_cls = format_inspector.get_inspector(disk_format)
-        if not inspector_cls.from_file(path).safety_check():
+
+        # Make sure we have a format inspector for the claimed format, else
+        # it is something we do not support and must reject. AMI is excluded.
+        if (disk_format not in ami_formats and
+                not format_inspector.get_inspector(disk_format)):
+            raise exception.ImageUnacceptable(
+                image_id=image_href,
+                reason=_('Image not in a supported format'))
+
+        inspector = format_inspector.detect_file_format(path)
+        if not inspector.safety_check():
             raise exception.ImageUnacceptable(
                 image_id=image_href,
                 reason=(_('Image does not pass safety check')))
+
+        # AMI formats can be other things, so don't obsess over this
+        # requirement for them. Otherwise, make sure our detection agrees
+        # with glance.
+        if disk_format not in ami_formats and str(inspector) != disk_format:
+            # If we detected the image as something other than glance claimed,
+            # we abort.
+            raise exception.ImageUnacceptable(
+                image_id=image_href,
+                reason=_('Image content does not match disk_format'))
     except format_inspector.ImageFormatError:
         # If the inspector we chose based on the image's metadata does not
         # think the image is the proper format, we refuse to use it.
         raise exception.ImageUnacceptable(
             image_id=image_href,
             reason=_('Image content does not match disk_format'))
-    except AttributeError:
-        # No inspector was found
-        LOG.warning('Unable to perform deep image inspection on type %r',
-                    img['disk_format'])
-        if disk_format in ('ami', 'aki', 'ari'):
-            # A lot of things can be in a UEC, although it is typically a raw
-            # filesystem. We really have nothing we can do other than treat it
-            # like a 'raw', which is what qemu-img will detect a filesystem as
-            # anyway. If someone puts a qcow2 inside, we should fail because
-            # we won't do our inspection.
-            disk_format = 'raw'
-        else:
-            raise exception.ImageUnacceptable(
-                image_id=image_href,
-                reason=_('Image not in a supported format'))
-
-    if disk_format == 'iso':
-        # ISO image passed safety check; qemu will treat this as raw from here
+    except Exception:
+        raise exception.ImageUnacceptable(
+            image_id=image_href,
+            reason=_('Image not in a supported format'))
+    if disk_format in ('iso',) + ami_formats:
+        # ISO or AMI image passed safety check; qemu will treat this as raw
+        # from here so return the expected formats it will find.
         disk_format = 'raw'
-
     return disk_format
 
 
@@ -194,12 +202,22 @@ def fetch_to_raw(context, image_href, path, trusted_certs=None):
 
         # Only run qemu-img after we have done deep inspection (if enabled).
         # If it was not enabled, we will let it detect the format.
-        data = qemu_img_info(path_tmp, format=force_format)
+        data = qemu_img_info(path_tmp)
         fmt = data.file_format
         if fmt is None:
             raise exception.ImageUnacceptable(
                 reason=_("'qemu-img info' parsing failed."),
                 image_id=image_href)
+        elif force_format is not None and fmt != force_format:
+            # Format inspector and qemu-img must agree on the format, else
+            # we reject. This will catch VMDK some variants that we don't
+            # explicitly support because qemu will identify them as such
+            # and we will not.
+            LOG.warning('Image %s detected by qemu as %s but we expected %s',
+                        image_href, fmt, force_format)
+            raise exception.ImageUnacceptable(
+                image_id=image_href,
+                reason=_('Image content does not match disk_format'))
 
         backing_file = data.backing_file
         if backing_file is not None: