Skip to content

Commit

Permalink
fix(MachinePool): always update vmssState
Browse files Browse the repository at this point in the history
Updates ScaleSets service to always update the scope's vmss state with
the latest data.
Previously, a long running operation would cause the vmssState to not be
updated and delayed creation of AzureMachinePoolMachines until much
later when that long running operation completed.
This change avoids this and updates the vmssState to what is retrieved
from the API at all times.
The call does not make more API requests because the VMSS GET request
was done anyway but its result ignored.
  • Loading branch information
mweibel committed Nov 26, 2024
1 parent 5b2cbce commit 291d1f1
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 38 deletions.
1 change: 0 additions & 1 deletion azure/scope/machinepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -971,7 +971,6 @@ func (m *MachinePoolScope) ReconcileReplicas(ctx context.Context, vmss *azure.VM
if capacity := int32(vmss.Capacity); capacity != replicas {
m.UpdateCAPIMachinePoolReplicas(ctx, &capacity)
}

return nil
}

Expand Down
51 changes: 34 additions & 17 deletions azure/services/scalesets/scalesets.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func (s *Service) Reconcile(ctx context.Context) (retErr error) {
return errors.Errorf("%T is not of type ScaleSetSpec", spec)
}

_, err := s.Client.Get(ctx, spec)
result, err := s.Client.Get(ctx, spec)
if err == nil {
// We can only get the existing instances if the VMSS already exists
scaleSetSpec.VMSSInstances, err = s.Client.ListInstances(ctx, spec.ResourceGroupName(), spec.ResourceName())
Expand All @@ -106,34 +106,51 @@ func (s *Service) Reconcile(ctx context.Context) (retErr error) {
s.Scope.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, err)
return err
}
if result != nil {
if err := s.updateScopeState(ctx, result, scaleSetSpec); err != nil {
return err
}
}
} else if !azure.ResourceNotFound(err) {
return errors.Wrapf(err, "failed to get existing VMSS")
}

result, err := s.CreateOrUpdateResource(ctx, scaleSetSpec, serviceName)
result, err = s.CreateOrUpdateResource(ctx, scaleSetSpec, serviceName)
s.Scope.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, err)

if err == nil && result != nil {
vmss, ok := result.(armcompute.VirtualMachineScaleSet)
if !ok {
return errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", result)
if err := s.updateScopeState(ctx, result, scaleSetSpec); err != nil {
return err
}
}

fetchedVMSS := converters.SDKToVMSS(vmss, scaleSetSpec.VMSSInstances)
if err := s.Scope.ReconcileReplicas(ctx, &fetchedVMSS); err != nil {
return errors.Wrap(err, "unable to reconcile VMSS replicas")
}
return err
}

// Transform the VMSS resource representation to conform to the cloud-provider-azure representation
providerID, err := azprovider.ConvertResourceGroupNameToLower(azureutil.ProviderIDPrefix + fetchedVMSS.ID)
if err != nil {
return errors.Wrapf(err, "failed to parse VMSS ID %s", fetchedVMSS.ID)
}
s.Scope.SetProviderID(providerID)
s.Scope.SetVMSSState(&fetchedVMSS)
// updateScopeState updates the scope's VMSS state and provider ID
//
// Code later in the reconciler uses scope's VMSS state for determining scale status and whether to create/delete
// AzureMachinePoolMachines.
// N.B.: before calling this function, make sure scaleSetSpec.VMSSInstances is updated to the latest state.
func (s *Service) updateScopeState(ctx context.Context, result interface{}, scaleSetSpec *ScaleSetSpec) error {
vmss, ok := result.(armcompute.VirtualMachineScaleSet)
if !ok {
return errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", result)
}

return err
fetchedVMSS := converters.SDKToVMSS(vmss, scaleSetSpec.VMSSInstances)
if err := s.Scope.ReconcileReplicas(ctx, &fetchedVMSS); err != nil {
return errors.Wrap(err, "unable to reconcile VMSS replicas")
}

// Transform the VMSS resource representation to conform to the cloud-provider-azure representation
providerID, err := azprovider.ConvertResourceGroupNameToLower(azureutil.ProviderIDPrefix + fetchedVMSS.ID)
if err != nil {
return errors.Wrapf(err, "failed to parse VMSS ID %s", fetchedVMSS.ID)
}
s.Scope.SetProviderID(providerID)
s.Scope.SetVMSSState(&fetchedVMSS)
return nil
}

// Delete deletes a scale set asynchronously. Delete sends a DELETE request to Azure and if accepted without error,
Expand Down
28 changes: 8 additions & 20 deletions azure/services/scalesets/scalesets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,14 @@ func TestReconcileVMSS(t *testing.T) {
spec := getDefaultVMSSSpec()
// Validate spec
s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes()
m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil)
m.Get(gomockinternal.AContext(), &defaultSpec).Return(resultVMSS, nil)
m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil)
r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName).Return(getResultVMSS(), nil)
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil)

s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil)
s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID)
s.SetVMSSState(&fetchedVMSS)
s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil).Times(2)
s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID).Times(2)
s.SetVMSSState(&fetchedVMSS).Times(2)
},
},
{
Expand Down Expand Up @@ -178,28 +178,16 @@ func TestReconcileVMSS(t *testing.T) {
s.DefaultedAzureServiceReconcileTimeout().Return(reconciler.DefaultAzureServiceReconcileTimeout)
spec := getDefaultVMSSSpec()
s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes()
m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil)
m.Get(gomockinternal.AContext(), &defaultSpec).Return(resultVMSS, nil)
m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil)

r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName).
Return(nil, internalError())
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, internalError())
},
},
{
name: "failed to reconcile replicas",
expectedError: "unable to reconcile VMSS replicas:.*#: Internal Server Error: StatusCode=500",
expect: func(g *WithT, s *mock_scalesets.MockScaleSetScopeMockRecorder, r *mock_async.MockReconcilerMockRecorder, m *mock_scalesets.MockClientMockRecorder) {
s.DefaultedAzureServiceReconcileTimeout().Return(reconciler.DefaultAzureServiceReconcileTimeout)
spec := getDefaultVMSSSpec()
s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes()
m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil)
m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil)

r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName).Return(getResultVMSS(), nil)
s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil)

s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(internalError())
s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil)
s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID)
s.SetVMSSState(&fetchedVMSS)
},
},
{
Expand Down

0 comments on commit 291d1f1

Please sign in to comment.