Skip to content

Commit

Permalink
Merge branch 'stackhpc/yoga' into upstream/yoga-2024-10-07
Browse files Browse the repository at this point in the history
  • Loading branch information
priteau authored Oct 7, 2024
2 parents d86bb10 + 651fed4 commit 084b4d3
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 11 deletions.
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* @stackhpc/openstack
12 changes: 12 additions & 0 deletions .github/workflows/tag-and-release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
name: Tag & Release
'on':
push:
branches:
- stackhpc/yoga
permissions:
actions: read
contents: write
jobs:
tag-and-release:
uses: stackhpc/.github/.github/workflows/tag-and-release.yml@main
7 changes: 7 additions & 0 deletions .github/workflows/tox.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
name: Tox Continuous Integration
'on':
pull_request:
jobs:
tox:
uses: stackhpc/.github/.github/workflows/tox.yml@main
2 changes: 2 additions & 0 deletions bindep.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,5 @@ pcre-devel [platform:rpm test]
# runtime and in unit tests. Net result is the same that lsscsi will be
# installed for any nova installation.
lsscsi
# NOTE(priteau): Install qemu-img for format inspector tests
qemu-utils [platform:dpkg test]
15 changes: 15 additions & 0 deletions nova/conf/workarounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,21 @@
known-features detection *before* passing the image to qemu-img. Generally,
this inspection should be enabled for maximum safety, but this workaround
option allows disabling it if there is a compatibility concern.
"""),
cfg.BoolOpt(
'skip_reserve_in_use_ironic_nodes',
default=False,
help="""
This may be useful if you use the Ironic driver, but don't have
automatic cleaning enabled in Ironic. Nova, by default, will mark
Ironic nodes as reserved as soon as they are in use. When you free
the Ironic node (by deleting the nova instance) it takes a while
for Nova to un-reserve that Ironic node in placement. Usually this
is a good idea, because it avoids placement providing an Ironic
as a valid candidate when it is still being cleaned.
Howerver, if you don't use automatic cleaning, it can cause an
extra delay before and Ironic node is available for building a
new Nova instance.
"""),
]

Expand Down
48 changes: 45 additions & 3 deletions nova/tests/unit/virt/ironic/test_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,48 @@ def test_update_provider_tree_with_rc_occupied(self, mock_nfc, mock_nr,

self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename)

expected = {
'CUSTOM_IRON_NFV': {
'total': 1,
'reserved': 1,
'min_unit': 1,
'max_unit': 1,
'step_size': 1,
'allocation_ratio': 1.0,
},
}
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).inventory
self.assertEqual(expected, result)

@mock.patch.object(ironic_driver.IronicDriver,
'_node_resources_used', return_value=True)
@mock.patch.object(ironic_driver.IronicDriver,
'_node_resources_unavailable', return_value=False)
@mock.patch.object(ironic_driver.IronicDriver, '_node_resource')
@mock.patch.object(ironic_driver.IronicDriver, '_node_from_cache')
def test_update_provider_tree_with_rc_occupied_workaround(self,
mock_nfc, mock_nr, mock_res_unavail, mock_res_used):
"""Ensure that when a node is used, we report the inventory matching
the consumed resources.
"""
self.flags(skip_reserve_in_use_ironic_nodes=True,
group="workarounds")
mock_nr.return_value = {
'vcpus': 24,
'vcpus_used': 24,
'memory_mb': 1024,
'memory_mb_used': 1024,
'local_gb': 100,
'local_gb_used': 100,
'resource_class': 'iron-nfv',
}

self.driver.update_provider_tree(self.ptree, mock.sentinel.nodename)

expected = {
'CUSTOM_IRON_NFV': {
'total': 1,
Expand All @@ -944,7 +986,7 @@ def test_update_provider_tree_with_rc_occupied(self, mock_nfc, mock_nr,
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
self.assertFalse(mock_res_unavail.called)
mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).inventory
self.assertEqual(expected, result)

Expand Down Expand Up @@ -1015,7 +1057,7 @@ def test_update_provider_tree_no_traits(self, mock_nfc, mock_nr,
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
self.assertFalse(mock_res_unavail.called)
mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).traits
self.assertEqual(set(), result)

Expand Down Expand Up @@ -1047,7 +1089,7 @@ def test_update_provider_tree_with_traits(self, mock_nfc, mock_nr,
mock_nfc.assert_called_once_with(mock.sentinel.nodename)
mock_nr.assert_called_once_with(mock_nfc.return_value)
mock_res_used.assert_called_once_with(mock_nfc.return_value)
self.assertFalse(mock_res_unavail.called)
mock_res_unavail.assert_called_once_with(mock_nfc.return_value)
result = self.ptree.data(mock.sentinel.nodename).traits
self.assertEqual(set(traits), result)

Expand Down
26 changes: 18 additions & 8 deletions nova/virt/ironic/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -886,15 +886,25 @@ def update_provider_tree(self, provider_tree, nodename, allocations=None):
"""
# nodename is the ironic node's UUID.
node = self._node_from_cache(nodename)

reserved = False
if (not self._node_resources_used(node) and
self._node_resources_unavailable(node)):
LOG.debug('Node %(node)s is not ready for a deployment, '
'reporting resources as reserved for it. Node\'s '
'provision state is %(prov)s, power state is '
'%(power)s and maintenance is %(maint)s.',
{'node': node.uuid, 'prov': node.provision_state,
'power': node.power_state, 'maint': node.maintenance})
if self._node_resources_unavailable(node):
# Operators might mark a node as in maintainance,
# even when an instance is on the node,
# either way lets mark this as reserved
reserved = True

if (self._node_resources_used(node) and
not CONF.workarounds.skip_reserve_in_use_ironic_nodes):
# Make resources as reserved once we have
# and instance here.
# When the allocation is deleted, most likely
# automatic clean will start, so we keep the node
# reserved until it becomes available again.
# In the case without automatic clean, once
# the allocation is removed in placement it
# also stays as reserved until we notice on
# the next periodic its actually available.
reserved = True

info = self._node_resource(node)
Expand Down
11 changes: 11 additions & 0 deletions releasenotes/notes/fix-ironic-scheduler-race-08cf8aba0365f512.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
fixes:
- |
Fixed when placement returns ironic nodes that have just started automatic
cleaning as possible valid candidates. This is done by marking all ironic
nodes with an instance on them as reserved, such that nova only makes them
available once we have double checked Ironic reports the node as available.
If you don't have automatic cleaning on, this might mean it takes longer
than normal for Ironic nodes to become available for new instances.
If you want the old behaviour use the following workaround config:
`[workarounds]skip_reserve_in_use_ironic_nodes=true`

0 comments on commit 084b4d3

Please sign in to comment.