Skip to content

Commit

Permalink
feat: add config to recover from previously failed attempt
Browse files Browse the repository at this point in the history
Signed-off-by: Lukas Wöhrl <[email protected]>

sync crd

Signed-off-by: Lukas Wöhrl <[email protected]>

add stable itter for tests

Signed-off-by: Lukas Wöhrl <[email protected]>

test: more debug output to find out what's wrong

Signed-off-by: Lukas Wöhrl <[email protected]>

fix: invalid helm default

Signed-off-by: Lukas Wöhrl <[email protected]>

fix: remove default helm value for maxRetryDuration

Signed-off-by: Lukas Wöhrl <[email protected]>
  • Loading branch information
woehrl01 committed Oct 22, 2024
1 parent b282951 commit 1d3d319
Show file tree
Hide file tree
Showing 7 changed files with 235 additions and 35 deletions.
3 changes: 3 additions & 0 deletions charts/aws-pca-issuer/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ spec:
{{- if .Values.disableApprovedCheck }}
- -disable-approved-check
{{- end }}
{{- if .Values.maxRetryDuration }}
- -max-retry-duration={{ .Values.maxRetryDuration }}
{{- end }}
ports:
- containerPort: 8080
name: http
Expand Down
10 changes: 9 additions & 1 deletion charts/aws-pca-issuer/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@ image:
# Disable waiting for CertificateRequests to be Approved before signing
disableApprovedCheck: false

# Maxium duration to retry fullfilling a CertificateRequest
#maxRetryDuration: 180s

# Optional secrets used for pulling the container image
#
# For example:
# imagePullSecrets:
# - name: secret-name
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
Expand All @@ -31,7 +39,6 @@ service:
type: ClusterIP
port: 8080


# Options for configuring a target ServiceAccount with the role to approve
# all awspca.cert-manager.io requests.
approverRole:
Expand All @@ -43,6 +50,7 @@ serviceMonitor:
create: false
annotations: {}

# Annotations to add to the issuer Pod
podAnnotations: {}

podSecurityContext:
Expand Down
16 changes: 16 additions & 0 deletions e2e/k8_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ func waitForIssuerReady(ctx context.Context, client *clientV1beta1.Client, name
return true, nil
}
}

fmt.Println("Issuer not ready yet - Current conditions:")
for _, condition := range issuer.Status.Conditions {
fmt.Printf("Type: %s, Status: %s, Reason: %s, Message: %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}
return false, nil
})
}
Expand All @@ -52,6 +57,11 @@ func waitForClusterIssuerReady(ctx context.Context, client *clientV1beta1.Client
}
}

fmt.Println("ClusterIssuer not ready yet - Current conditions:")
for _, condition := range issuer.Status.Conditions {
fmt.Printf("Type: %s, Status: %s, Reason: %s, Message: %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}

return false, nil
})
}
Expand All @@ -71,6 +81,12 @@ func waitForCertificateReady(ctx context.Context, client *cmclientv1.Certmanager
return true, nil
}
}

fmt.Println("Certifiate not ready yet - Current conditions:")
for _, condition := range certificate.Status.Conditions {
fmt.Printf("Type: %s, Status: %s, Reason: %s, Message: %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}

return false, nil
})
}
5 changes: 5 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package main
import (
"flag"
"os"
"time"

certmanager "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1"

Expand Down Expand Up @@ -56,6 +57,7 @@ func main() {
var enableLeaderElection bool
var probeAddr string
var disableApprovedCheck bool
var maxRetryDuration time.Duration

flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
Expand All @@ -64,6 +66,7 @@ func main() {
"Enabling this will ensure there is only one active controller manager.")
flag.BoolVar(&disableApprovedCheck, "disable-approved-check", false,
"Disables waiting for CertificateRequests to have an approved condition before signing.")
flag.DurationVar(&maxRetryDuration, "max-retry-duration", 180, "Maximum duration to retry failed CertificateRequests. Set to 0 to disable.")

opts := zap.Options{
Development: false,
Expand Down Expand Up @@ -118,7 +121,9 @@ func main() {
Recorder: mgr.GetEventRecorderFor("awspcaissuer-controller"),

Clock: clock.RealClock{},
RequeueItter: controllers.NewRequeueItter(),
CheckApprovedCondition: !disableApprovedCheck,
MaxRetryDuration: maxRetryDuration,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "CertificateRequest")
os.Exit(1)
Expand Down
3 changes: 3 additions & 0 deletions pkg/api/v1beta1/awspcaissuer_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ type AWSPCAIssuerStatus struct {
// ConditionTypeReady is the default condition type for the CRs
const ConditionTypeReady = "Ready"

// ConditionTypeIssuing is the condition type for the CRs when they are issuing a certificate
const ConditionTypeIssuing = "Issuing"

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status

Expand Down
83 changes: 65 additions & 18 deletions pkg/controllers/certificaterequest_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package controllers
import (
"context"
"fmt"
"math/rand"
"time"

"github.com/cert-manager/aws-privateca-issuer/pkg/aws"
"github.com/cert-manager/aws-privateca-issuer/pkg/util"
Expand All @@ -40,6 +42,21 @@ import (
cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1"
)

type RequeueItter interface {
RequeueAfter() time.Duration
}

type requeueItter struct {
}

func (r *requeueItter) RequeueAfter() time.Duration {
return 1*time.Minute + time.Duration(rand.Intn(60))*time.Second
}

func NewRequeueItter() RequeueItter {
return &requeueItter{}
}

// CertificateRequestReconciler reconciles a AWSPCAIssuer object
type CertificateRequestReconciler struct {
client.Client
Expand All @@ -48,7 +65,9 @@ type CertificateRequestReconciler struct {
Recorder record.EventRecorder

Clock clock.Clock
RequeueItter RequeueItter
CheckApprovedCondition bool
MaxRetryDuration time.Duration
}

// +kubebuilder:rbac:groups=cert-manager.io,resources=certificaterequests,verbs=get;list;watch;update
Expand Down Expand Up @@ -116,7 +135,7 @@ func (r *CertificateRequestReconciler) Reconcile(ctx context.Context, req ctrl.R
}

message := "The CertificateRequest was denied by an approval controller"
return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonDenied, message)
return ctrl.Result{}, r.setPermanentStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonDenied, message)
}

if r.CheckApprovedCondition {
Expand All @@ -140,36 +159,47 @@ func (r *CertificateRequestReconciler) Reconcile(ctx context.Context, req ctrl.R
issuerName.Namespace = ""
}

retry, err := r.HandleSignRequest(ctx, log, issuerName, cr)

if err != nil {
now := r.Clock.Now()
creationTime := cr.GetCreationTimestamp()
pastMaxRetryDuration := now.After(creationTime.Add(r.MaxRetryDuration))

if pastMaxRetryDuration || !retry {
return ctrl.Result{}, r.setPermanentStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "Permanent error signing certificate: %s", err.Error())
}

return ctrl.Result{
RequeueAfter: r.RequeueItter.RequeueAfter(),
}, r.setTemporaryStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "Temporary error signing certificate, retry again: %s", err.Error())
}

return ctrl.Result{}, r.setPermanentStatus(ctx, cr, cmmeta.ConditionTrue, cmapi.CertificateRequestReasonIssued, "Certificate issued")
}

func (r *CertificateRequestReconciler) HandleSignRequest(ctx context.Context, log logr.Logger, issuerName types.NamespacedName, cr *cmapi.CertificateRequest) (bool, error) {
iss, err := util.GetIssuer(ctx, r.Client, issuerName)
if err != nil {
log.Error(err, "failed to retrieve Issuer resource")
_ = r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "issuer could not be found")
return ctrl.Result{}, err
return true, fmt.Errorf("failed to retrieve Issuer resource: %w", err)
}

if !isReady(iss) {
err := fmt.Errorf("issuer %s is not ready", iss.GetName())
_ = r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "issuer is not ready")
return ctrl.Result{}, err
return true, fmt.Errorf("issuer %s is not ready", iss.GetName())
}

provisioner, ok := aws.GetProvisioner(issuerName)
if !ok {
err := fmt.Errorf("provisioner for %s not found", issuerName)
log.Error(err, "failed to retrieve provisioner")
_ = r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "failed to retrieve provisioner")
return ctrl.Result{}, err
return true, fmt.Errorf("provisioner for %s not found (name: %s, namespace: %s)", issuerName, issuerName.Name, issuerName.Namespace)
}

pem, ca, err := provisioner.Sign(ctx, cr, log)
if err != nil {
log.Error(err, "failed to request certificate from PCA")
return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "failed to request certificate from PCA: "+err.Error())
return false, fmt.Errorf("failed to sign certificat from PCA: %w", err)
}
cr.Status.Certificate = pem
cr.Status.CA = ca

return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionTrue, cmapi.CertificateRequestReasonIssued, "certificate issued")
return false, nil
}

// SetupWithManager sets up the controller with the Manager.
Expand All @@ -188,15 +218,32 @@ func isReady(issuer api.GenericIssuer) bool {
return false
}

func (r *CertificateRequestReconciler) setStatus(ctx context.Context, cr *cmapi.CertificateRequest, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
func (r *CertificateRequestReconciler) setStatusInternal(ctx context.Context, cr *cmapi.CertificateRequest, permanent bool, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
completeMessage := fmt.Sprintf(message, args...)
cmutil.SetCertificateRequestCondition(cr, "Ready", status, reason, completeMessage)

eventType := core.EventTypeNormal
if status == cmmeta.ConditionFalse {
eventType = core.EventTypeWarning
}
r.Recorder.Event(cr, eventType, reason, completeMessage)

return r.Client.Status().Update(ctx, cr)
cmutil.SetCertificateRequestCondition(cr, api.ConditionTypeIssuing, status, reason, completeMessage)
if permanent {
cmutil.SetCertificateRequestCondition(cr, api.ConditionTypeReady, status, reason, completeMessage)
}
r.Client.Status().Update(ctx, cr)

if reason == cmapi.CertificateRequestReasonFailed {
return fmt.Errorf(completeMessage)
}

return nil
}

func (r *CertificateRequestReconciler) setPermanentStatus(ctx context.Context, cr *cmapi.CertificateRequest, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
return r.setStatusInternal(ctx, cr, true, status, reason, message, args...)
}

func (r *CertificateRequestReconciler) setTemporaryStatus(ctx context.Context, cr *cmapi.CertificateRequest, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
return r.setStatusInternal(ctx, cr, false, status, reason, message, args...)
}
Loading

0 comments on commit 1d3d319

Please sign in to comment.