From c534be7e007aafd0f64748ad9e8118db8dd73fa4 Mon Sep 17 00:00:00 2001 From: Amit Uniyal Date: Wed, 15 Mar 2023 05:07:28 +0000 Subject: [PATCH] Disconnecting volume from the compute host cmd nova-manage volume_attachment refresh vm-id vol-id connetor There were cases where the instance said to live in compute#1 but the connection_info in the BDM record was for compute#2, and when the script called `remove_volume_connection` then nova would call os-brick on compute#1 (the wrong node) and try to detach it. In some case os-brick would mistakenly think that the volume was attached (because the target and lun matched an existing volume on the host) and would try to disconnect, resulting in errors on the compute logs. - Added HostConflict exception - Fixes dedent in cmd/manange.py - Updates nova-manage doc Conflicts: stable/2023.2 - nova/exception.py changes: (I052441076c677c0fe76a8d9421af70b0ffa1d400) Removed 2 new EphemeralEncryption exceptions as feature is not backportable. Closes-Bug: #2012365 Change-Id: I21109752ff1c56d3cefa58fcd36c68bf468e0a73 (cherry picked from commit a8f81d5f08692c16d538af3197460d9426cc00a1) (cherry picked from commit ac45029f15ad36a8471a8cfcdde8a16701236787) --- doc/source/cli/nova-manage.rst | 4 ++- nova/cmd/manage.py | 26 +++++++++++------ nova/exception.py | 4 +++ nova/tests/unit/cmd/test_manage.py | 46 +++++++++++++++++++++++++++++- 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/doc/source/cli/nova-manage.rst b/doc/source/cli/nova-manage.rst index 53152a0a6fd..082f8e88528 100644 --- a/doc/source/cli/nova-manage.rst +++ b/doc/source/cli/nova-manage.rst @@ -1531,7 +1531,9 @@ command. * - 5 - Instance state invalid (must be stopped and unlocked) * - 6 - - Instance is not attached to volume + - Volume is not attached to the instance + * - 7 + - Connector host is not correct Libvirt Commands diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index 8f4e845ff2b..27d371a6a0a 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -161,18 +161,14 @@ def locked_instance(cell_mapping, instance, reason): initial_state = 'locked' if instance.locked else 'unlocked' if not instance.locked: with context.target_cell( - context.get_admin_context(), - cell_mapping - ) as cctxt: + context.get_admin_context(), cell_mapping) as cctxt: compute_api.lock(cctxt, instance, reason=reason) try: yield finally: if initial_state == 'unlocked': with context.target_cell( - context.get_admin_context(), - cell_mapping - ) as cctxt: + context.get_admin_context(), cell_mapping) as cctxt: compute_api.unlock(cctxt, instance) @@ -3078,8 +3074,15 @@ def _do_refresh(self, cctxt, instance, # TODO(lyarwood): Add delete_attachment as a kwarg to # remove_volume_connection as is available in the private # method within the manager. - compute_rpcapi.remove_volume_connection( - cctxt, instance, volume_id, instance.host) + if instance.host == connector['host']: + compute_rpcapi.remove_volume_connection( + cctxt, instance, volume_id, instance.host) + else: + msg = ( + f"The compute host '{connector['host']}' in the " + f"connector does not match the instance host " + f"'{instance.host}'.") + raise exception.HostConflict(_(msg)) # Delete the existing volume attachment if present in the bdm. # This isn't present when the original attachment was made @@ -3161,6 +3164,7 @@ def refresh(self, instance_uuid=None, volume_id=None, connector_path=None): * 4: Instance does not exist. * 5: Instance state invalid. * 6: Volume is not attached to instance. + * 7: Connector host is not correct. """ try: # TODO(lyarwood): Make this optional and provide a rpcapi capable @@ -3176,6 +3180,12 @@ def refresh(self, instance_uuid=None, volume_id=None, connector_path=None): # Refresh the volume attachment return self._refresh(instance_uuid, volume_id, connector) + except exception.HostConflict as e: + print( + f"The command 'nova-manage volume_attachment get_connector' " + f"may have been run on the wrong compute host. Or the " + f"instance host may be wrong and in need of repair.\n{e}") + return 7 except exception.VolumeBDMNotFound as e: print(str(e)) return 6 diff --git a/nova/exception.py b/nova/exception.py index c3ada86c1e2..85e0a9c85dd 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2530,3 +2530,7 @@ class NotSupportedComputeForEvacuateV295(NotSupported): "instance on destination. To evacuate before upgrades are " "complete please use an older microversion. Required version " "for compute %(expected), current version %(currently)s") + + +class HostConflict(Exception): + pass diff --git a/nova/tests/unit/cmd/test_manage.py b/nova/tests/unit/cmd/test_manage.py index bb2efc82f1d..fca97180e4d 100644 --- a/nova/tests/unit/cmd/test_manage.py +++ b/nova/tests/unit/cmd/test_manage.py @@ -3622,6 +3622,50 @@ def test_refresh_attachment_unknown_failure( mock_action_start.assert_called_once() mock_action.finish.assert_called_once() + @mock.patch('nova.compute.rpcapi.ComputeAPI', autospec=True) + @mock.patch('nova.volume.cinder.API', autospec=True) + @mock.patch('nova.compute.api.API', autospec=True) + @mock.patch.object(objects.BlockDeviceMapping, 'save') + @mock.patch.object( + objects.BlockDeviceMapping, 'get_by_volume_and_instance') + @mock.patch.object(objects.Instance, 'get_by_uuid') + @mock.patch.object(objects.InstanceAction, 'action_start') + def test_refresh_invalid_connector_host( + self, mock_action_start, mock_get_instance, + mock_get_bdm, mock_save_bdm, mock_compute_api, mock_volume_api, + mock_compute_rpcapi + ): + """Test refresh with a old host not disconnected properly + and connector host info is not correct, a fake-host is + passed. + """ + + fake_volume_api = mock_volume_api.return_value + device_name = '/dev/vda' + + mock_get_instance.return_value = objects.Instance( + uuid=uuidsentinel.instance, + vm_state=obj_fields.InstanceState.STOPPED, + host='old-host', locked=False) + mock_get_bdm.return_value = objects.BlockDeviceMapping( + uuid=uuidsentinel.bdm, volume_id=uuidsentinel.volume, + attachment_id=uuidsentinel.instance, + device_name=device_name) + mock_action = mock.Mock(spec=objects.InstanceAction) + mock_action_start.return_value = mock_action + + fake_volume_api.attachment_create.return_value = { + 'id': uuidsentinel.new_attachment, + } + # in instance we have host as 'old-host' + # but here 'fake-host' is passed in connector info. + fake_volume_api.attachment_update.return_value = { + 'connection_info': self._get_fake_connector_info(), + } + + ret = self._test_refresh() + self.assertEqual(7, ret) + @mock.patch('nova.compute.rpcapi.ComputeAPI', autospec=True) @mock.patch('nova.volume.cinder.API', autospec=True) @mock.patch('nova.compute.api.API', autospec=True) @@ -3644,7 +3688,7 @@ def test_refresh( mock_get_instance.return_value = objects.Instance( uuid=uuidsentinel.instance, vm_state=obj_fields.InstanceState.STOPPED, - host='foo', locked=False) + host='fake-host', locked=False) mock_get_bdm.return_value = objects.BlockDeviceMapping( uuid=uuidsentinel.bdm, volume_id=uuidsentinel.volume, attachment_id=uuidsentinel.instance,