Skip to content

Commit

Permalink
Disconnecting volume from the compute host
Browse files Browse the repository at this point in the history
cmd nova-manage volume_attachment refresh vm-id vol-id connetor

There were cases where the instance said to live in compute#1 but the
connection_info in the BDM record was for compute#2, and when the script
called `remove_volume_connection` then nova would call os-brick on
compute#1 (the wrong node) and try to detach it.

In some case os-brick would mistakenly think that the volume was
attached (because the target and lun matched an existing volume on the
host) and would try to disconnect, resulting in errors on the compute
logs.

- Added HostConflict exception
- Fixes dedent in cmd/manange.py
- Updates nova-manage doc

Conflicts: stable/2023.2
       - nova/exception.py
       changes:
       (I052441076c677c0fe76a8d9421af70b0ffa1d400)
       Removed 2 new EphemeralEncryption exceptions as feature is not backportable.

Closes-Bug: #2012365
Change-Id: I21109752ff1c56d3cefa58fcd36c68bf468e0a73
(cherry picked from commit a8f81d5)
(cherry picked from commit ac45029)
  • Loading branch information
Amit Uniyal committed Mar 12, 2024
1 parent fc54bc9 commit c534be7
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 10 deletions.
4 changes: 3 additions & 1 deletion doc/source/cli/nova-manage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1531,7 +1531,9 @@ command.
* - 5
- Instance state invalid (must be stopped and unlocked)
* - 6
- Instance is not attached to volume
- Volume is not attached to the instance
* - 7
- Connector host is not correct


Libvirt Commands
Expand Down
26 changes: 18 additions & 8 deletions nova/cmd/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,18 +161,14 @@ def locked_instance(cell_mapping, instance, reason):
initial_state = 'locked' if instance.locked else 'unlocked'
if not instance.locked:
with context.target_cell(
context.get_admin_context(),
cell_mapping
) as cctxt:
context.get_admin_context(), cell_mapping) as cctxt:
compute_api.lock(cctxt, instance, reason=reason)
try:
yield
finally:
if initial_state == 'unlocked':
with context.target_cell(
context.get_admin_context(),
cell_mapping
) as cctxt:
context.get_admin_context(), cell_mapping) as cctxt:
compute_api.unlock(cctxt, instance)


Expand Down Expand Up @@ -3078,8 +3074,15 @@ def _do_refresh(self, cctxt, instance,
# TODO(lyarwood): Add delete_attachment as a kwarg to
# remove_volume_connection as is available in the private
# method within the manager.
compute_rpcapi.remove_volume_connection(
cctxt, instance, volume_id, instance.host)
if instance.host == connector['host']:
compute_rpcapi.remove_volume_connection(
cctxt, instance, volume_id, instance.host)
else:
msg = (
f"The compute host '{connector['host']}' in the "
f"connector does not match the instance host "
f"'{instance.host}'.")
raise exception.HostConflict(_(msg))

# Delete the existing volume attachment if present in the bdm.
# This isn't present when the original attachment was made
Expand Down Expand Up @@ -3161,6 +3164,7 @@ def refresh(self, instance_uuid=None, volume_id=None, connector_path=None):
* 4: Instance does not exist.
* 5: Instance state invalid.
* 6: Volume is not attached to instance.
* 7: Connector host is not correct.
"""
try:
# TODO(lyarwood): Make this optional and provide a rpcapi capable
Expand All @@ -3176,6 +3180,12 @@ def refresh(self, instance_uuid=None, volume_id=None, connector_path=None):
# Refresh the volume attachment
return self._refresh(instance_uuid, volume_id, connector)

except exception.HostConflict as e:
print(
f"The command 'nova-manage volume_attachment get_connector' "
f"may have been run on the wrong compute host. Or the "
f"instance host may be wrong and in need of repair.\n{e}")
return 7
except exception.VolumeBDMNotFound as e:
print(str(e))
return 6
Expand Down
4 changes: 4 additions & 0 deletions nova/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -2530,3 +2530,7 @@ class NotSupportedComputeForEvacuateV295(NotSupported):
"instance on destination. To evacuate before upgrades are "
"complete please use an older microversion. Required version "
"for compute %(expected), current version %(currently)s")


class HostConflict(Exception):
pass
46 changes: 45 additions & 1 deletion nova/tests/unit/cmd/test_manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3622,6 +3622,50 @@ def test_refresh_attachment_unknown_failure(
mock_action_start.assert_called_once()
mock_action.finish.assert_called_once()

@mock.patch('nova.compute.rpcapi.ComputeAPI', autospec=True)
@mock.patch('nova.volume.cinder.API', autospec=True)
@mock.patch('nova.compute.api.API', autospec=True)
@mock.patch.object(objects.BlockDeviceMapping, 'save')
@mock.patch.object(
objects.BlockDeviceMapping, 'get_by_volume_and_instance')
@mock.patch.object(objects.Instance, 'get_by_uuid')
@mock.patch.object(objects.InstanceAction, 'action_start')
def test_refresh_invalid_connector_host(
self, mock_action_start, mock_get_instance,
mock_get_bdm, mock_save_bdm, mock_compute_api, mock_volume_api,
mock_compute_rpcapi
):
"""Test refresh with a old host not disconnected properly
and connector host info is not correct, a fake-host is
passed.
"""

fake_volume_api = mock_volume_api.return_value
device_name = '/dev/vda'

mock_get_instance.return_value = objects.Instance(
uuid=uuidsentinel.instance,
vm_state=obj_fields.InstanceState.STOPPED,
host='old-host', locked=False)
mock_get_bdm.return_value = objects.BlockDeviceMapping(
uuid=uuidsentinel.bdm, volume_id=uuidsentinel.volume,
attachment_id=uuidsentinel.instance,
device_name=device_name)
mock_action = mock.Mock(spec=objects.InstanceAction)
mock_action_start.return_value = mock_action

fake_volume_api.attachment_create.return_value = {
'id': uuidsentinel.new_attachment,
}
# in instance we have host as 'old-host'
# but here 'fake-host' is passed in connector info.
fake_volume_api.attachment_update.return_value = {
'connection_info': self._get_fake_connector_info(),
}

ret = self._test_refresh()
self.assertEqual(7, ret)

@mock.patch('nova.compute.rpcapi.ComputeAPI', autospec=True)
@mock.patch('nova.volume.cinder.API', autospec=True)
@mock.patch('nova.compute.api.API', autospec=True)
Expand All @@ -3644,7 +3688,7 @@ def test_refresh(
mock_get_instance.return_value = objects.Instance(
uuid=uuidsentinel.instance,
vm_state=obj_fields.InstanceState.STOPPED,
host='foo', locked=False)
host='fake-host', locked=False)
mock_get_bdm.return_value = objects.BlockDeviceMapping(
uuid=uuidsentinel.bdm, volume_id=uuidsentinel.volume,
attachment_id=uuidsentinel.instance,
Expand Down

0 comments on commit c534be7

Please sign in to comment.