diff --git a/controller/linodemachine_controller.go b/controller/linodemachine_controller.go index 419f0f9f8..8e4c4fac5 100644 --- a/controller/linodemachine_controller.go +++ b/controller/linodemachine_controller.go @@ -55,6 +55,7 @@ import ( const ( linodeBusyCode = 400 + linodeTooManyRequests = 429 defaultDiskFilesystem = string(linodego.FilesystemExt4) // conditions for preflight instance creation @@ -309,6 +310,10 @@ func (r *LinodeMachineReconciler) reconcileCreate( linodeInstance, err = machineScope.LinodeClient.CreateInstance(ctx, *createOpts) if err != nil { + if linodego.ErrHasStatus(err, linodeTooManyRequests) || linodego.ErrHasStatus(err, linodego.ErrorFromError) { + logger.Error(err, "Failed to create Linode instance due to API error, requeing") + return ctrl.Result{RequeueAfter: reconciler.DefaultMachineControllerRetryDelay}, nil + } logger.Error(err, "Failed to create Linode machine instance") if reconciler.RecordDecayingCondition(machineScope.LinodeMachine, diff --git a/controller/linodemachine_controller_test.go b/controller/linodemachine_controller_test.go index acce4cde1..ea040619e 100644 --- a/controller/linodemachine_controller_test.go +++ b/controller/linodemachine_controller_test.go @@ -257,6 +257,41 @@ var _ = Describe("create", Label("machine", "create"), func() { }) }) + Context("when a known error occurs", func() { + It("requeues due to context deadline exceeded error", func(ctx SpecContext) { + mockLinodeClient := mock.NewMockLinodeClient(mockCtrl) + listInst := mockLinodeClient.EXPECT(). + ListInstances(ctx, gomock.Any()). + Return([]linodego.Instance{}, nil) + getRegion := mockLinodeClient.EXPECT(). + GetRegion(ctx, gomock.Any()). + After(listInst). + Return(&linodego.Region{Capabilities: []string{"Metadata"}}, nil) + getImage := mockLinodeClient.EXPECT(). + GetImage(ctx, gomock.Any()). + After(getRegion). + Return(&linodego.Image{Capabilities: []string{"cloud-init"}}, nil) + mockLinodeClient.EXPECT(). + CreateInstance(ctx, gomock.Any()). + After(getImage). + DoAndReturn(func(_, _ any) (*linodego.Instance, error) { + return nil, linodego.NewError(errors.New("context deadline exceeded")) + }) + mScope := scope.MachineScope{ + Client: k8sClient, + LinodeClient: mockLinodeClient, + Cluster: &cluster, + Machine: &machine, + LinodeCluster: &linodeCluster, + LinodeMachine: &linodeMachine, + } + + res, err := reconciler.reconcileCreate(ctx, logger, &mScope) + Expect(err).NotTo(HaveOccurred()) + Expect(res.RequeueAfter).To(Equal(rutil.DefaultMachineControllerRetryDelay)) + }) + }) + Context("creates a instance with disks", func() { It("in a single call when disks aren't delayed", func(ctx SpecContext) { machine.Labels[clusterv1.MachineControlPlaneLabel] = "true" diff --git a/util/reconciler/defaults.go b/util/reconciler/defaults.go index 7530cfc2d..bcb73e01b 100644 --- a/util/reconciler/defaults.go +++ b/util/reconciler/defaults.go @@ -36,6 +36,8 @@ const ( DefaultMachineControllerWaitForPreflightTimeout = 5 * time.Minute // DefaultMachineControllerWaitForRunningTimeout is the default timeout if instance is not running. DefaultMachineControllerWaitForRunningTimeout = 20 * time.Minute + // DefaultMachineControllerRetryDelay is the default requeue delay if there is an error. + DefaultMachineControllerRetryDelay = 10 * time.Second // DefaultVPCControllerReconcileDelay is the default requeue delay when a reconcile operation fails. DefaultVPCControllerReconcileDelay = 5 * time.Second