Skip to content

Commit

Permalink
refactor: csr approval controller
Browse files Browse the repository at this point in the history
Move CSR Approval to separate controller.

Signed-off-by: Serge Logvinov <[email protected]>
  • Loading branch information
sergelogvinov committed Aug 28, 2024
1 parent 31c9b5b commit 69b8976
Show file tree
Hide file tree
Showing 17 changed files with 113 additions and 57 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ build: ## Build

.PHONY: run
run: build
./talos-cloud-controller-manager-$(ARCH) --v=5 --kubeconfig=kubeconfig --cloud-config=hack/ccm-config.yaml --controllers=cloud-node,node-ipam-controller \
./talos-cloud-controller-manager-$(ARCH) --v=5 --kubeconfig=kubeconfig --cloud-config=hack/ccm-config.yaml --controllers=cloud-node,node-csr-approval,node-ipam-controller \
--allocate-node-cidrs \
--node-cidr-mask-size-ipv4=24 --node-cidr-mask-size-ipv6=80 \
--cidr-allocator-type=CloudAllocator \
--use-service-account-credentials --leader-elect=false --bind-address=127.0.0.1 --secure-port=0 --authorization-always-allow-paths=/healthz,/livez,/readyz,/metrics
--use-service-account-credentials --leader-elect=false --bind-address=127.0.0.1 --secure-port=8443 --authorization-always-allow-paths=/healthz,/livez,/readyz,/metrics

.PHONY: lint
lint: ## Lint Code
Expand Down
2 changes: 1 addition & 1 deletion charts/talos-cloud-controller-manager/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ keywords:
maintainers:
- name: sergelogvinov
url: https://github.com/sergelogvinov
version: 0.3.1
version: 0.4.0
appVersion: "v1.6.0"
10 changes: 3 additions & 7 deletions charts/talos-cloud-controller-manager/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# talos-cloud-controller-manager

![Version: 0.3.0](https://img.shields.io/badge/Version-0.3.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.6.0](https://img.shields.io/badge/AppVersion-v1.6.0-informational?style=flat-square)
![Version: 0.4.0](https://img.shields.io/badge/Version-0.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.6.0](https://img.shields.io/badge/AppVersion-v1.6.0-informational?style=flat-square)

Talos Cloud Controller Manager Helm Chart

Expand All @@ -27,12 +27,9 @@ Kubernetes: `>= 1.24.0`

replicaCount: 2

features:
# `approveNodeCSR` - check and approve node CSR.
approveNodeCSR: true

enabledControllers:
- cloud-node
- node-csr-approval

# Deploy CCM only on control-plane nodes
nodeSelector:
Expand All @@ -54,9 +51,8 @@ helm upgrade -i --namespace=kube-system -f talos-ccm.yaml \
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| affinity | object | `{}` | Affinity for data pods assignment. ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity |
| enabledControllers | list | `["cloud-node"]` | List of controllers should be enabled. Use '*' to enable all controllers. Support only `cloud-node` controller. |
| enabledControllers | list | `["cloud-node","node-csr-approval"]` | List of controllers should be enabled. Use '*' to enable all controllers. Support only `cloud-node, cloud-node-lifecycle, node-csr-approval, node-ipam-controller` controllers. |
| extraArgs | list | `[]` | Any extra arguments for talos-cloud-controller-manager |
| features.approveNodeCSR | bool | `true` | List of CCM features. `approveNodeCSR` - check and approve node CSR. |
| fullnameOverride | string | `""` | String to fully override deployment name. |
| image.pullPolicy | string | `"IfNotPresent"` | Pull policy: IfNotPresent or Always. |
| image.repository | string | `"ghcr.io/siderolabs/talos-cloud-controller-manager"` | CCM image repository. |
Expand Down
5 changes: 1 addition & 4 deletions charts/talos-cloud-controller-manager/README.md.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@

replicaCount: 2

features:
# `approveNodeCSR` - check and approve node CSR.
approveNodeCSR: true

enabledControllers:
- cloud-node
- node-csr-approval

# Deploy CCM only on control-plane nodes
nodeSelector:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ metadata:
data:
ccm-config.yaml: |
global:
{{- if .Values.features.approveNodeCSR }}
approveNodeCSR: true
{{- end }}
{{- with .Values.transformations }}
transformations:
{{- toYaml . | nindent 6 }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,13 @@ spec:
value: "6443"
{{- end }}
ports:
- containerPort: {{ .Values.service.containerPort }}
name: https
- name: metrics
containerPort: {{ .Values.service.containerPort }}
protocol: TCP
livenessProbe:
httpGet:
path: /healthz
port: https
port: metrics
scheme: HTTPS
initialDelaySeconds: 20
periodSeconds: 30
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
clusterIP: None
type: ClusterIP
ports:
- name: https
- name: metrics
port: {{ .Values.service.port }}
targetPort: {{ .Values.service.containerPort }}
protocol: TCP
Expand Down
3 changes: 1 addition & 2 deletions charts/talos-cloud-controller-manager/values-example.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@

image:
repository: ghcr.io/sergelogvinov/talos-cloud-controller-manager
pullPolicy: Always
tag: latest
tag: edge

logVerbosityLevel: 4
11 changes: 4 additions & 7 deletions charts/talos-cloud-controller-manager/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,14 @@ extraArgs: []

# -- List of controllers should be enabled.
# Use '*' to enable all controllers.
# Support only `cloud-node` controller.
# Support only `cloud-node, cloud-node-lifecycle, node-csr-approval, node-ipam-controller` controllers.
enabledControllers:
- cloud-node
# - cloud-node-lifecycle
# - route
# - service

features:
# -- List of CCM features.
# `approveNodeCSR` - check and approve node CSR.
approveNodeCSR: true
- node-csr-approval
# - node-ipam-controller

# -- List of node transformations.
# Available matchExpressions key values: https://github.com/siderolabs/talos/blob/main/pkg/machinery/resources/runtime/platform_metadata.go#L28
Expand Down Expand Up @@ -73,7 +70,7 @@ serviceAccount:
name: ""

# -- CCM pods' priorityClassName.
priorityClassName: ""
priorityClassName: system-cluster-critical

# -- Annotations for data pods.
# ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
Expand Down
12 changes: 11 additions & 1 deletion cmd/talos-cloud-controller-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,21 @@ func main() {
Constructor: nodeIpamController.startNodeIpamControllerWrapper,
}

nodeCSRApproval := nodeCSRApprovalController{}
controllerInitializers[kcmnames.CertificateSigningRequestApprovingController] = app.ControllerInitFuncConstructor{
InitContext: app.ControllerInitContext{
ClientName: talos.ServiceAccountName,
},
Constructor: nodeCSRApproval.startNodeCSRApprovalControllerWrapper,
}

app.ControllersDisabledByDefault.Insert(kcmnames.NodeLifecycleController)
app.ControllersDisabledByDefault.Insert(kcmnames.NodeIpamController)
app.ControllersDisabledByDefault.Insert(kcmnames.CertificateSigningRequestApprovingController)
controllerAliases["nodeipam"] = kcmnames.NodeIpamController
command := app.NewCloudControllerManagerCommand(ccmOptions, cloudInitializer, controllerInitializers, controllerAliases, fss, wait.NeverStop)
controllerAliases["node-csr-approval"] = kcmnames.CertificateSigningRequestApprovingController

command := app.NewCloudControllerManagerCommand(ccmOptions, cloudInitializer, controllerInitializers, controllerAliases, fss, wait.NeverStop)
command.Flags().VisitAll(func(flag *pflag.Flag) {
if flag.Name == "cloud-provider" {
if err := flag.Value.Set(talos.ProviderName); err != nil {
Expand Down
61 changes: 61 additions & 0 deletions cmd/talos-cloud-controller-manager/nodecsrapprovalcontroller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"context"

"github.com/siderolabs/talos-cloud-controller-manager/pkg/certificatesigningrequest"
"github.com/siderolabs/talos-cloud-controller-manager/pkg/talos"

cloudprovider "k8s.io/cloud-provider"
app "k8s.io/cloud-provider/app"
cloudcontrollerconfig "k8s.io/cloud-provider/app/config"
genericcontrollermanager "k8s.io/controller-manager/app"
"k8s.io/controller-manager/controller"
"k8s.io/klog/v2"
)

type nodeCSRApprovalController struct{}

func (approvalController *nodeCSRApprovalController) startNodeCSRApprovalControllerWrapper(
initContext app.ControllerInitContext,
_ *cloudcontrollerconfig.CompletedConfig,
cloud cloudprovider.Interface,
) app.InitFunc {
klog.V(4).InfoS("nodeCSRApprovalController.startNodeCSRApprovalControllerWrapper() called")

return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) {
return startNodeCSRApprovalController(ctx, initContext, controllerContext, cloud)
}
}

func startNodeCSRApprovalController(
ctx context.Context,
initContext app.ControllerInitContext,
controllerContext genericcontrollermanager.ControllerContext,
_ cloudprovider.Interface,
) (controller.Interface, bool, error) {
csrController := certificatesigningrequest.NewCsrController(
controllerContext.ClientBuilder.ClientOrDie(initContext.ClientName),
talos.CSRNodeChecks,
)

go csrController.Run(ctx)

return nil, true, nil
}
25 changes: 13 additions & 12 deletions docs/deploy/cloud-controller-manager-edge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ kind: ServiceAccount
metadata:
name: talos-cloud-controller-manager
labels:
helm.sh/chart: talos-cloud-controller-manager-0.3.0
helm.sh/chart: talos-cloud-controller-manager-0.4.0
app.kubernetes.io/name: talos-cloud-controller-manager
app.kubernetes.io/instance: talos-cloud-controller-manager
app.kubernetes.io/version: "v1.6.0"
Expand All @@ -18,7 +18,7 @@ kind: ServiceAccount
metadata:
name: talos-cloud-controller-manager-talos-secrets
labels:
helm.sh/chart: talos-cloud-controller-manager-0.3.0
helm.sh/chart: talos-cloud-controller-manager-0.4.0
app.kubernetes.io/name: talos-cloud-controller-manager
app.kubernetes.io/instance: talos-cloud-controller-manager
app.kubernetes.io/version: "v1.6.0"
Expand All @@ -34,7 +34,7 @@ kind: ConfigMap
metadata:
name: talos-cloud-controller-manager
labels:
helm.sh/chart: talos-cloud-controller-manager-0.3.0
helm.sh/chart: talos-cloud-controller-manager-0.4.0
app.kubernetes.io/name: talos-cloud-controller-manager
app.kubernetes.io/instance: talos-cloud-controller-manager
app.kubernetes.io/version: "v1.6.0"
Expand All @@ -43,15 +43,14 @@ metadata:
data:
ccm-config.yaml: |
global:
approveNodeCSR: true
---
# Source: talos-cloud-controller-manager/templates/role.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: system:talos-cloud-controller-manager
labels:
helm.sh/chart: talos-cloud-controller-manager-0.3.0
helm.sh/chart: talos-cloud-controller-manager-0.4.0
app.kubernetes.io/name: talos-cloud-controller-manager
app.kubernetes.io/instance: talos-cloud-controller-manager
app.kubernetes.io/version: "v1.6.0"
Expand Down Expand Up @@ -159,7 +158,7 @@ kind: Service
metadata:
name: talos-cloud-controller-manager
labels:
helm.sh/chart: talos-cloud-controller-manager-0.3.0
helm.sh/chart: talos-cloud-controller-manager-0.4.0
app.kubernetes.io/name: talos-cloud-controller-manager
app.kubernetes.io/instance: talos-cloud-controller-manager
app.kubernetes.io/version: "v1.6.0"
Expand All @@ -169,7 +168,7 @@ spec:
clusterIP: None
type: ClusterIP
ports:
- name: https
- name: metrics
port: 50258
targetPort: 50258
protocol: TCP
Expand All @@ -183,7 +182,7 @@ kind: Deployment
metadata:
name: talos-cloud-controller-manager
labels:
helm.sh/chart: talos-cloud-controller-manager-0.3.0
helm.sh/chart: talos-cloud-controller-manager-0.4.0
app.kubernetes.io/name: talos-cloud-controller-manager
app.kubernetes.io/instance: talos-cloud-controller-manager
app.kubernetes.io/version: "v1.6.0"
Expand All @@ -210,6 +209,7 @@ spec:
runAsGroup: 10258
runAsNonRoot: true
runAsUser: 10258
priorityClassName: system-cluster-critical
containers:
- name: talos-cloud-controller-manager
securityContext:
Expand All @@ -226,18 +226,19 @@ spec:
- --v=2
- --cloud-provider=talos
- --cloud-config=/etc/talos/ccm-config.yaml
- --controllers=cloud-node
- --controllers=cloud-node,node-csr-approval
- --leader-elect-resource-name=cloud-controller-manager-talos
- --use-service-account-credentials
- --secure-port=50258
- --authorization-always-allow-paths=/healthz,/livez,/readyz,/metrics
ports:
- containerPort: 50258
name: https
- name: metrics
containerPort: 50258
protocol: TCP
livenessProbe:
httpGet:
path: /healthz
port: https
port: metrics
scheme: HTTPS
initialDelaySeconds: 20
periodSeconds: 30
Expand Down
1 change: 0 additions & 1 deletion hack/ccm-config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
global:
approveNodeCSR: true
# endpoints:
# - 1.2.3.4
# - 4.3.2.1
Expand Down
7 changes: 7 additions & 0 deletions pkg/certificatesigningrequest/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ func (r *Reconciler) Run(ctx context.Context) {
continue
}

if csr.Spec.SignerName != certificatesv1.KubeletServingSignerName {
klog.V(5).InfoS("CertificateSigningRequestReconciler: ignoring, not a Kubelet serving certificate",
"signer", csr.Spec.SignerName)

continue
}

valid, err := r.Reconcile(ctx, csr)
if err != nil {
klog.ErrorS(err, "CertificateSigningRequestReconciler: failed to reconcile CSR", "name", csr.Name)
Expand Down
11 changes: 1 addition & 10 deletions pkg/talos/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"fmt"
"io"

"github.com/siderolabs/talos-cloud-controller-manager/pkg/certificatesigningrequest"
"github.com/siderolabs/talos-cloud-controller-manager/pkg/talosclient"

clientkubernetes "k8s.io/client-go/kubernetes"
Expand All @@ -32,8 +31,7 @@ const (
type Cloud struct {
client *client

instancesV2 cloudprovider.InstancesV2
csrController *certificatesigningrequest.Reconciler
instancesV2 cloudprovider.InstancesV2

ctx context.Context //nolint:containedctx
stop func()
Expand Down Expand Up @@ -108,13 +106,6 @@ func (c *Cloud) Initialize(clientBuilder cloudprovider.ControllerClientBuilder,
provider.stop()
}(c)

if c.client.config.Global.ApproveNodeCSR {
klog.InfoS("Started CSR Node controller")

c.csrController = certificatesigningrequest.NewCsrController(c.client.kclient, csrNodeChecks)
go c.csrController.Run(c.ctx)
}

klog.InfoS("talos initialized")
}

Expand Down
3 changes: 2 additions & 1 deletion pkg/talos/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,9 @@ func syncNodeLabels(c *client, node *v1.Node, nodeLabels map[string]string) erro
return nil
}

// CSRNodeChecks checks if the IP addresses in the CSR match the IP addresses of the node.
// TODO: add more checks, like domain name, worker nodes don't have controlplane IPs, etc...
func csrNodeChecks(ctx context.Context, kclient clientkubernetes.Interface, x509cr *x509.CertificateRequest) (bool, error) {
func CSRNodeChecks(ctx context.Context, kclient clientkubernetes.Interface, x509cr *x509.CertificateRequest) (bool, error) {
node, err := kclient.CoreV1().Nodes().Get(ctx, x509cr.DNSNames[0], metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("failed to get node %s: %w", x509cr.DNSNames[0], err)
Expand Down
Loading

0 comments on commit 69b8976

Please sign in to comment.