From 69b897623fbcabdd7fe790e1db9946888050ca73 Mon Sep 17 00:00:00 2001 From: Serge Logvinov Date: Wed, 28 Aug 2024 16:48:17 +0300 Subject: [PATCH] refactor: csr approval controller Move CSR Approval to separate controller. Signed-off-by: Serge Logvinov --- Makefile | 4 +- .../talos-cloud-controller-manager/Chart.yaml | 2 +- .../talos-cloud-controller-manager/README.md | 10 +-- .../README.md.gotmpl | 5 +- .../templates/configmap.yaml | 3 - .../templates/deployment.yaml | 6 +- .../templates/service.yaml | 2 +- .../values-example.yaml | 3 +- .../values.yaml | 11 ++-- cmd/talos-cloud-controller-manager/main.go | 12 +++- .../nodecsrapprovalcontroller.go | 61 +++++++++++++++++++ docs/deploy/cloud-controller-manager-edge.yml | 25 ++++---- hack/ccm-config.yaml | 1 - pkg/certificatesigningrequest/controller.go | 7 +++ pkg/talos/cloud.go | 11 +--- pkg/talos/helper.go | 3 +- pkg/talos/helper_test.go | 4 +- 17 files changed, 113 insertions(+), 57 deletions(-) create mode 100644 cmd/talos-cloud-controller-manager/nodecsrapprovalcontroller.go diff --git a/Makefile b/Makefile index 818fcc3..329d2fa 100644 --- a/Makefile +++ b/Makefile @@ -72,11 +72,11 @@ build: ## Build .PHONY: run run: build - ./talos-cloud-controller-manager-$(ARCH) --v=5 --kubeconfig=kubeconfig --cloud-config=hack/ccm-config.yaml --controllers=cloud-node,node-ipam-controller \ + ./talos-cloud-controller-manager-$(ARCH) --v=5 --kubeconfig=kubeconfig --cloud-config=hack/ccm-config.yaml --controllers=cloud-node,node-csr-approval,node-ipam-controller \ --allocate-node-cidrs \ --node-cidr-mask-size-ipv4=24 --node-cidr-mask-size-ipv6=80 \ --cidr-allocator-type=CloudAllocator \ - --use-service-account-credentials --leader-elect=false --bind-address=127.0.0.1 --secure-port=0 --authorization-always-allow-paths=/healthz,/livez,/readyz,/metrics + --use-service-account-credentials --leader-elect=false --bind-address=127.0.0.1 --secure-port=8443 --authorization-always-allow-paths=/healthz,/livez,/readyz,/metrics .PHONY: lint lint: ## Lint Code diff --git a/charts/talos-cloud-controller-manager/Chart.yaml b/charts/talos-cloud-controller-manager/Chart.yaml index 9eaf4d9..9504c99 100644 --- a/charts/talos-cloud-controller-manager/Chart.yaml +++ b/charts/talos-cloud-controller-manager/Chart.yaml @@ -11,5 +11,5 @@ keywords: maintainers: - name: sergelogvinov url: https://github.com/sergelogvinov -version: 0.3.1 +version: 0.4.0 appVersion: "v1.6.0" diff --git a/charts/talos-cloud-controller-manager/README.md b/charts/talos-cloud-controller-manager/README.md index 93e7c99..33f8d5a 100644 --- a/charts/talos-cloud-controller-manager/README.md +++ b/charts/talos-cloud-controller-manager/README.md @@ -1,6 +1,6 @@ # talos-cloud-controller-manager -![Version: 0.3.0](https://img.shields.io/badge/Version-0.3.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.6.0](https://img.shields.io/badge/AppVersion-v1.6.0-informational?style=flat-square) +![Version: 0.4.0](https://img.shields.io/badge/Version-0.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.6.0](https://img.shields.io/badge/AppVersion-v1.6.0-informational?style=flat-square) Talos Cloud Controller Manager Helm Chart @@ -27,12 +27,9 @@ Kubernetes: `>= 1.24.0` replicaCount: 2 -features: - # `approveNodeCSR` - check and approve node CSR. - approveNodeCSR: true - enabledControllers: - cloud-node + - node-csr-approval # Deploy CCM only on control-plane nodes nodeSelector: @@ -54,9 +51,8 @@ helm upgrade -i --namespace=kube-system -f talos-ccm.yaml \ | Key | Type | Default | Description | |-----|------|---------|-------------| | affinity | object | `{}` | Affinity for data pods assignment. ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity | -| enabledControllers | list | `["cloud-node"]` | List of controllers should be enabled. Use '*' to enable all controllers. Support only `cloud-node` controller. | +| enabledControllers | list | `["cloud-node","node-csr-approval"]` | List of controllers should be enabled. Use '*' to enable all controllers. Support only `cloud-node, cloud-node-lifecycle, node-csr-approval, node-ipam-controller` controllers. | | extraArgs | list | `[]` | Any extra arguments for talos-cloud-controller-manager | -| features.approveNodeCSR | bool | `true` | List of CCM features. `approveNodeCSR` - check and approve node CSR. | | fullnameOverride | string | `""` | String to fully override deployment name. | | image.pullPolicy | string | `"IfNotPresent"` | Pull policy: IfNotPresent or Always. | | image.repository | string | `"ghcr.io/siderolabs/talos-cloud-controller-manager"` | CCM image repository. | diff --git a/charts/talos-cloud-controller-manager/README.md.gotmpl b/charts/talos-cloud-controller-manager/README.md.gotmpl index f0b239c..1482b1e 100644 --- a/charts/talos-cloud-controller-manager/README.md.gotmpl +++ b/charts/talos-cloud-controller-manager/README.md.gotmpl @@ -21,12 +21,9 @@ replicaCount: 2 -features: - # `approveNodeCSR` - check and approve node CSR. - approveNodeCSR: true - enabledControllers: - cloud-node + - node-csr-approval # Deploy CCM only on control-plane nodes nodeSelector: diff --git a/charts/talos-cloud-controller-manager/templates/configmap.yaml b/charts/talos-cloud-controller-manager/templates/configmap.yaml index d9e6dfa..e9a31e0 100644 --- a/charts/talos-cloud-controller-manager/templates/configmap.yaml +++ b/charts/talos-cloud-controller-manager/templates/configmap.yaml @@ -8,9 +8,6 @@ metadata: data: ccm-config.yaml: | global: - {{- if .Values.features.approveNodeCSR }} - approveNodeCSR: true - {{- end }} {{- with .Values.transformations }} transformations: {{- toYaml . | nindent 6 }} diff --git a/charts/talos-cloud-controller-manager/templates/deployment.yaml b/charts/talos-cloud-controller-manager/templates/deployment.yaml index 2c1dfa8..13033cb 100644 --- a/charts/talos-cloud-controller-manager/templates/deployment.yaml +++ b/charts/talos-cloud-controller-manager/templates/deployment.yaml @@ -75,13 +75,13 @@ spec: value: "6443" {{- end }} ports: - - containerPort: {{ .Values.service.containerPort }} - name: https + - name: metrics + containerPort: {{ .Values.service.containerPort }} protocol: TCP livenessProbe: httpGet: path: /healthz - port: https + port: metrics scheme: HTTPS initialDelaySeconds: 20 periodSeconds: 30 diff --git a/charts/talos-cloud-controller-manager/templates/service.yaml b/charts/talos-cloud-controller-manager/templates/service.yaml index 7a5829a..eaf61d8 100644 --- a/charts/talos-cloud-controller-manager/templates/service.yaml +++ b/charts/talos-cloud-controller-manager/templates/service.yaml @@ -13,7 +13,7 @@ spec: clusterIP: None type: ClusterIP ports: - - name: https + - name: metrics port: {{ .Values.service.port }} targetPort: {{ .Values.service.containerPort }} protocol: TCP diff --git a/charts/talos-cloud-controller-manager/values-example.yaml b/charts/talos-cloud-controller-manager/values-example.yaml index 470f67b..aee65f4 100644 --- a/charts/talos-cloud-controller-manager/values-example.yaml +++ b/charts/talos-cloud-controller-manager/values-example.yaml @@ -1,7 +1,6 @@ image: - repository: ghcr.io/sergelogvinov/talos-cloud-controller-manager pullPolicy: Always - tag: latest + tag: edge logVerbosityLevel: 4 diff --git a/charts/talos-cloud-controller-manager/values.yaml b/charts/talos-cloud-controller-manager/values.yaml index be98a7e..78f3dbd 100644 --- a/charts/talos-cloud-controller-manager/values.yaml +++ b/charts/talos-cloud-controller-manager/values.yaml @@ -28,17 +28,14 @@ extraArgs: [] # -- List of controllers should be enabled. # Use '*' to enable all controllers. -# Support only `cloud-node` controller. +# Support only `cloud-node, cloud-node-lifecycle, node-csr-approval, node-ipam-controller` controllers. enabledControllers: - cloud-node # - cloud-node-lifecycle # - route # - service - -features: - # -- List of CCM features. - # `approveNodeCSR` - check and approve node CSR. - approveNodeCSR: true + - node-csr-approval + # - node-ipam-controller # -- List of node transformations. # Available matchExpressions key values: https://github.com/siderolabs/talos/blob/main/pkg/machinery/resources/runtime/platform_metadata.go#L28 @@ -73,7 +70,7 @@ serviceAccount: name: "" # -- CCM pods' priorityClassName. -priorityClassName: "" +priorityClassName: system-cluster-critical # -- Annotations for data pods. # ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ diff --git a/cmd/talos-cloud-controller-manager/main.go b/cmd/talos-cloud-controller-manager/main.go index f56dfc6..7009730 100644 --- a/cmd/talos-cloud-controller-manager/main.go +++ b/cmd/talos-cloud-controller-manager/main.go @@ -62,11 +62,21 @@ func main() { Constructor: nodeIpamController.startNodeIpamControllerWrapper, } + nodeCSRApproval := nodeCSRApprovalController{} + controllerInitializers[kcmnames.CertificateSigningRequestApprovingController] = app.ControllerInitFuncConstructor{ + InitContext: app.ControllerInitContext{ + ClientName: talos.ServiceAccountName, + }, + Constructor: nodeCSRApproval.startNodeCSRApprovalControllerWrapper, + } + app.ControllersDisabledByDefault.Insert(kcmnames.NodeLifecycleController) app.ControllersDisabledByDefault.Insert(kcmnames.NodeIpamController) + app.ControllersDisabledByDefault.Insert(kcmnames.CertificateSigningRequestApprovingController) controllerAliases["nodeipam"] = kcmnames.NodeIpamController - command := app.NewCloudControllerManagerCommand(ccmOptions, cloudInitializer, controllerInitializers, controllerAliases, fss, wait.NeverStop) + controllerAliases["node-csr-approval"] = kcmnames.CertificateSigningRequestApprovingController + command := app.NewCloudControllerManagerCommand(ccmOptions, cloudInitializer, controllerInitializers, controllerAliases, fss, wait.NeverStop) command.Flags().VisitAll(func(flag *pflag.Flag) { if flag.Name == "cloud-provider" { if err := flag.Value.Set(talos.ProviderName); err != nil { diff --git a/cmd/talos-cloud-controller-manager/nodecsrapprovalcontroller.go b/cmd/talos-cloud-controller-manager/nodecsrapprovalcontroller.go new file mode 100644 index 0000000..125aeeb --- /dev/null +++ b/cmd/talos-cloud-controller-manager/nodecsrapprovalcontroller.go @@ -0,0 +1,61 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "context" + + "github.com/siderolabs/talos-cloud-controller-manager/pkg/certificatesigningrequest" + "github.com/siderolabs/talos-cloud-controller-manager/pkg/talos" + + cloudprovider "k8s.io/cloud-provider" + app "k8s.io/cloud-provider/app" + cloudcontrollerconfig "k8s.io/cloud-provider/app/config" + genericcontrollermanager "k8s.io/controller-manager/app" + "k8s.io/controller-manager/controller" + "k8s.io/klog/v2" +) + +type nodeCSRApprovalController struct{} + +func (approvalController *nodeCSRApprovalController) startNodeCSRApprovalControllerWrapper( + initContext app.ControllerInitContext, + _ *cloudcontrollerconfig.CompletedConfig, + cloud cloudprovider.Interface, +) app.InitFunc { + klog.V(4).InfoS("nodeCSRApprovalController.startNodeCSRApprovalControllerWrapper() called") + + return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) { + return startNodeCSRApprovalController(ctx, initContext, controllerContext, cloud) + } +} + +func startNodeCSRApprovalController( + ctx context.Context, + initContext app.ControllerInitContext, + controllerContext genericcontrollermanager.ControllerContext, + _ cloudprovider.Interface, +) (controller.Interface, bool, error) { + csrController := certificatesigningrequest.NewCsrController( + controllerContext.ClientBuilder.ClientOrDie(initContext.ClientName), + talos.CSRNodeChecks, + ) + + go csrController.Run(ctx) + + return nil, true, nil +} diff --git a/docs/deploy/cloud-controller-manager-edge.yml b/docs/deploy/cloud-controller-manager-edge.yml index f832efb..82ba304 100644 --- a/docs/deploy/cloud-controller-manager-edge.yml +++ b/docs/deploy/cloud-controller-manager-edge.yml @@ -5,7 +5,7 @@ kind: ServiceAccount metadata: name: talos-cloud-controller-manager labels: - helm.sh/chart: talos-cloud-controller-manager-0.3.0 + helm.sh/chart: talos-cloud-controller-manager-0.4.0 app.kubernetes.io/name: talos-cloud-controller-manager app.kubernetes.io/instance: talos-cloud-controller-manager app.kubernetes.io/version: "v1.6.0" @@ -18,7 +18,7 @@ kind: ServiceAccount metadata: name: talos-cloud-controller-manager-talos-secrets labels: - helm.sh/chart: talos-cloud-controller-manager-0.3.0 + helm.sh/chart: talos-cloud-controller-manager-0.4.0 app.kubernetes.io/name: talos-cloud-controller-manager app.kubernetes.io/instance: talos-cloud-controller-manager app.kubernetes.io/version: "v1.6.0" @@ -34,7 +34,7 @@ kind: ConfigMap metadata: name: talos-cloud-controller-manager labels: - helm.sh/chart: talos-cloud-controller-manager-0.3.0 + helm.sh/chart: talos-cloud-controller-manager-0.4.0 app.kubernetes.io/name: talos-cloud-controller-manager app.kubernetes.io/instance: talos-cloud-controller-manager app.kubernetes.io/version: "v1.6.0" @@ -43,7 +43,6 @@ metadata: data: ccm-config.yaml: | global: - approveNodeCSR: true --- # Source: talos-cloud-controller-manager/templates/role.yaml apiVersion: rbac.authorization.k8s.io/v1 @@ -51,7 +50,7 @@ kind: ClusterRole metadata: name: system:talos-cloud-controller-manager labels: - helm.sh/chart: talos-cloud-controller-manager-0.3.0 + helm.sh/chart: talos-cloud-controller-manager-0.4.0 app.kubernetes.io/name: talos-cloud-controller-manager app.kubernetes.io/instance: talos-cloud-controller-manager app.kubernetes.io/version: "v1.6.0" @@ -159,7 +158,7 @@ kind: Service metadata: name: talos-cloud-controller-manager labels: - helm.sh/chart: talos-cloud-controller-manager-0.3.0 + helm.sh/chart: talos-cloud-controller-manager-0.4.0 app.kubernetes.io/name: talos-cloud-controller-manager app.kubernetes.io/instance: talos-cloud-controller-manager app.kubernetes.io/version: "v1.6.0" @@ -169,7 +168,7 @@ spec: clusterIP: None type: ClusterIP ports: - - name: https + - name: metrics port: 50258 targetPort: 50258 protocol: TCP @@ -183,7 +182,7 @@ kind: Deployment metadata: name: talos-cloud-controller-manager labels: - helm.sh/chart: talos-cloud-controller-manager-0.3.0 + helm.sh/chart: talos-cloud-controller-manager-0.4.0 app.kubernetes.io/name: talos-cloud-controller-manager app.kubernetes.io/instance: talos-cloud-controller-manager app.kubernetes.io/version: "v1.6.0" @@ -210,6 +209,7 @@ spec: runAsGroup: 10258 runAsNonRoot: true runAsUser: 10258 + priorityClassName: system-cluster-critical containers: - name: talos-cloud-controller-manager securityContext: @@ -226,18 +226,19 @@ spec: - --v=2 - --cloud-provider=talos - --cloud-config=/etc/talos/ccm-config.yaml - - --controllers=cloud-node + - --controllers=cloud-node,node-csr-approval - --leader-elect-resource-name=cloud-controller-manager-talos - --use-service-account-credentials - --secure-port=50258 + - --authorization-always-allow-paths=/healthz,/livez,/readyz,/metrics ports: - - containerPort: 50258 - name: https + - name: metrics + containerPort: 50258 protocol: TCP livenessProbe: httpGet: path: /healthz - port: https + port: metrics scheme: HTTPS initialDelaySeconds: 20 periodSeconds: 30 diff --git a/hack/ccm-config.yaml b/hack/ccm-config.yaml index aa5f9c1..f7a6339 100644 --- a/hack/ccm-config.yaml +++ b/hack/ccm-config.yaml @@ -1,5 +1,4 @@ global: - approveNodeCSR: true # endpoints: # - 1.2.3.4 # - 4.3.2.1 diff --git a/pkg/certificatesigningrequest/controller.go b/pkg/certificatesigningrequest/controller.go index 01dfc22..6e1c1af 100644 --- a/pkg/certificatesigningrequest/controller.go +++ b/pkg/certificatesigningrequest/controller.go @@ -86,6 +86,13 @@ func (r *Reconciler) Run(ctx context.Context) { continue } + if csr.Spec.SignerName != certificatesv1.KubeletServingSignerName { + klog.V(5).InfoS("CertificateSigningRequestReconciler: ignoring, not a Kubelet serving certificate", + "signer", csr.Spec.SignerName) + + continue + } + valid, err := r.Reconcile(ctx, csr) if err != nil { klog.ErrorS(err, "CertificateSigningRequestReconciler: failed to reconcile CSR", "name", csr.Name) diff --git a/pkg/talos/cloud.go b/pkg/talos/cloud.go index eefee81..59068d0 100644 --- a/pkg/talos/cloud.go +++ b/pkg/talos/cloud.go @@ -6,7 +6,6 @@ import ( "fmt" "io" - "github.com/siderolabs/talos-cloud-controller-manager/pkg/certificatesigningrequest" "github.com/siderolabs/talos-cloud-controller-manager/pkg/talosclient" clientkubernetes "k8s.io/client-go/kubernetes" @@ -32,8 +31,7 @@ const ( type Cloud struct { client *client - instancesV2 cloudprovider.InstancesV2 - csrController *certificatesigningrequest.Reconciler + instancesV2 cloudprovider.InstancesV2 ctx context.Context //nolint:containedctx stop func() @@ -108,13 +106,6 @@ func (c *Cloud) Initialize(clientBuilder cloudprovider.ControllerClientBuilder, provider.stop() }(c) - if c.client.config.Global.ApproveNodeCSR { - klog.InfoS("Started CSR Node controller") - - c.csrController = certificatesigningrequest.NewCsrController(c.client.kclient, csrNodeChecks) - go c.csrController.Run(c.ctx) - } - klog.InfoS("talos initialized") } diff --git a/pkg/talos/helper.go b/pkg/talos/helper.go index 8e49d3d..95e0e43 100644 --- a/pkg/talos/helper.go +++ b/pkg/talos/helper.go @@ -189,8 +189,9 @@ func syncNodeLabels(c *client, node *v1.Node, nodeLabels map[string]string) erro return nil } +// CSRNodeChecks checks if the IP addresses in the CSR match the IP addresses of the node. // TODO: add more checks, like domain name, worker nodes don't have controlplane IPs, etc... -func csrNodeChecks(ctx context.Context, kclient clientkubernetes.Interface, x509cr *x509.CertificateRequest) (bool, error) { +func CSRNodeChecks(ctx context.Context, kclient clientkubernetes.Interface, x509cr *x509.CertificateRequest) (bool, error) { node, err := kclient.CoreV1().Nodes().Get(ctx, x509cr.DNSNames[0], metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("failed to get node %s: %w", x509cr.DNSNames[0], err) diff --git a/pkg/talos/helper_test.go b/pkg/talos/helper_test.go index 5ee5a71..c911ec7 100644 --- a/pkg/talos/helper_test.go +++ b/pkg/talos/helper_test.go @@ -377,7 +377,7 @@ func TestSyncNodeLabels(t *testing.T) { } } -func TestCsrNodeChecks(t *testing.T) { +func TestCSRNodeChecks(t *testing.T) { ctx := context.Background() nodes := &v1.NodeList{ Items: []v1.Node{ @@ -528,7 +528,7 @@ func TestCsrNodeChecks(t *testing.T) { } { t.Run(tt.name, func(t *testing.T) { kclient := fake.NewSimpleClientset(nodes) - approve, err := csrNodeChecks(ctx, kclient, tt.cert) + approve, err := CSRNodeChecks(ctx, kclient, tt.cert) if tt.expectedError != nil { assert.Equal(t, tt.expectedError.Error(), err.Error())