From 4f65eacff268ac036e23bf9bcba137bbd7525212 Mon Sep 17 00:00:00 2001 From: Michael Weibel Date: Tue, 19 Nov 2024 17:18:44 +0100 Subject: [PATCH] fix(MachinePool): always update vmssState Updates ScaleSets service to always update the scope's vmss state with the latest data. Previously, a long running operation would cause the vmssState to not be updated and delayed creation of AzureMachinePoolMachines until much later when that long running operation completed. This change avoids this and updates the vmssState to what is retrieved from the API at all times. The call does not make more API requests because the VMSS GET request was done anyway but its result ignored. --- azure/services/scalesets/scalesets.go | 51 ++++++++++++++-------- azure/services/scalesets/scalesets_test.go | 28 ++++-------- 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/azure/services/scalesets/scalesets.go b/azure/services/scalesets/scalesets.go index 4311b43283e..068e4489d2f 100644 --- a/azure/services/scalesets/scalesets.go +++ b/azure/services/scalesets/scalesets.go @@ -97,7 +97,7 @@ func (s *Service) Reconcile(ctx context.Context) (retErr error) { return errors.Errorf("%T is not of type ScaleSetSpec", spec) } - _, err := s.Client.Get(ctx, spec) + result, err := s.Client.Get(ctx, spec) if err == nil { // We can only get the existing instances if the VMSS already exists scaleSetSpec.VMSSInstances, err = s.Client.ListInstances(ctx, spec.ResourceGroupName(), spec.ResourceName()) @@ -106,34 +106,51 @@ func (s *Service) Reconcile(ctx context.Context) (retErr error) { s.Scope.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, err) return err } + if result != nil { + if err := s.updateScopeState(ctx, result, scaleSetSpec); err != nil { + return err + } + } } else if !azure.ResourceNotFound(err) { return errors.Wrapf(err, "failed to get existing VMSS") } - result, err := s.CreateOrUpdateResource(ctx, scaleSetSpec, serviceName) + result, err = s.CreateOrUpdateResource(ctx, scaleSetSpec, serviceName) s.Scope.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, err) if err == nil && result != nil { - vmss, ok := result.(armcompute.VirtualMachineScaleSet) - if !ok { - return errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", result) + if err := s.updateScopeState(ctx, result, scaleSetSpec); err != nil { + return err } + } - fetchedVMSS := converters.SDKToVMSS(vmss, scaleSetSpec.VMSSInstances) - if err := s.Scope.ReconcileReplicas(ctx, &fetchedVMSS); err != nil { - return errors.Wrap(err, "unable to reconcile VMSS replicas") - } + return err +} - // Transform the VMSS resource representation to conform to the cloud-provider-azure representation - providerID, err := azprovider.ConvertResourceGroupNameToLower(azureutil.ProviderIDPrefix + fetchedVMSS.ID) - if err != nil { - return errors.Wrapf(err, "failed to parse VMSS ID %s", fetchedVMSS.ID) - } - s.Scope.SetProviderID(providerID) - s.Scope.SetVMSSState(&fetchedVMSS) +// updateScopeState updates the scope's VMSS state and provider ID +// +// Code later in the reconciler uses scope's VMSS state for determining scale status and whether to create/delete +// AzureMachinePoolMachines. +// N.B.: before calling this function, make sure scaleSetSpec.VMSSInstances is updated to the latest state. +func (s *Service) updateScopeState(ctx context.Context, result interface{}, scaleSetSpec *ScaleSetSpec) error { + vmss, ok := result.(armcompute.VirtualMachineScaleSet) + if !ok { + return errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", result) } - return err + fetchedVMSS := converters.SDKToVMSS(vmss, scaleSetSpec.VMSSInstances) + if err := s.Scope.ReconcileReplicas(ctx, &fetchedVMSS); err != nil { + return errors.Wrap(err, "unable to reconcile VMSS replicas") + } + + // Transform the VMSS resource representation to conform to the cloud-provider-azure representation + providerID, err := azprovider.ConvertResourceGroupNameToLower(azureutil.ProviderIDPrefix + fetchedVMSS.ID) + if err != nil { + return errors.Wrapf(err, "failed to parse VMSS ID %s", fetchedVMSS.ID) + } + s.Scope.SetProviderID(providerID) + s.Scope.SetVMSSState(&fetchedVMSS) + return nil } // Delete deletes a scale set asynchronously. Delete sends a DELETE request to Azure and if accepted without error, diff --git a/azure/services/scalesets/scalesets_test.go b/azure/services/scalesets/scalesets_test.go index 15cb9e5e43b..8dbc770f8a4 100644 --- a/azure/services/scalesets/scalesets_test.go +++ b/azure/services/scalesets/scalesets_test.go @@ -120,14 +120,14 @@ func TestReconcileVMSS(t *testing.T) { spec := getDefaultVMSSSpec() // Validate spec s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes() - m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil) + m.Get(gomockinternal.AContext(), &defaultSpec).Return(resultVMSS, nil) m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil) r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName).Return(getResultVMSS(), nil) s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil) - s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil) - s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID) - s.SetVMSSState(&fetchedVMSS) + s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil).Times(2) + s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID).Times(2) + s.SetVMSSState(&fetchedVMSS).Times(2) }, }, { @@ -178,28 +178,16 @@ func TestReconcileVMSS(t *testing.T) { s.DefaultedAzureServiceReconcileTimeout().Return(reconciler.DefaultAzureServiceReconcileTimeout) spec := getDefaultVMSSSpec() s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes() - m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil) + m.Get(gomockinternal.AContext(), &defaultSpec).Return(resultVMSS, nil) m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil) r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName). Return(nil, internalError()) s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, internalError()) - }, - }, - { - name: "failed to reconcile replicas", - expectedError: "unable to reconcile VMSS replicas:.*#: Internal Server Error: StatusCode=500", - expect: func(g *WithT, s *mock_scalesets.MockScaleSetScopeMockRecorder, r *mock_async.MockReconcilerMockRecorder, m *mock_scalesets.MockClientMockRecorder) { - s.DefaultedAzureServiceReconcileTimeout().Return(reconciler.DefaultAzureServiceReconcileTimeout) - spec := getDefaultVMSSSpec() - s.ScaleSetSpec(gomockinternal.AContext()).Return(spec).AnyTimes() - m.Get(gomockinternal.AContext(), &defaultSpec).Return(&resultVMSS, nil) - m.ListInstances(gomockinternal.AContext(), defaultSpec.ResourceGroup, defaultSpec.Name).Return(defaultInstances, nil) - r.CreateOrUpdateResource(gomockinternal.AContext(), spec, serviceName).Return(getResultVMSS(), nil) - s.UpdatePutStatus(infrav1.BootstrapSucceededCondition, serviceName, nil) - - s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(internalError()) + s.ReconcileReplicas(gomockinternal.AContext(), &fetchedVMSS).Return(nil) + s.SetProviderID(azureutil.ProviderIDPrefix + defaultVMSSID) + s.SetVMSSState(&fetchedVMSS) }, }, {