Skip to content

Commit

Permalink
feat(vd): set tolerations for provisioners
Browse files Browse the repository at this point in the history
Signed-off-by: Isteb4k <[email protected]>
  • Loading branch information
Isteb4k committed Dec 11, 2024
1 parent afe682a commit 4380a3a
Show file tree
Hide file tree
Showing 34 changed files with 1,117 additions and 244 deletions.
220 changes: 220 additions & 0 deletions images/cdi-artifact/patches/019-manage-provisioner-tolerations.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
diff --git a/pkg/controller/clone-controller.go b/pkg/controller/clone-controller.go
index 59ee5fd3f..046d1f916 100644
--- a/pkg/controller/clone-controller.go
+++ b/pkg/controller/clone-controller.go
@@ -500,6 +500,11 @@ func (r *CloneReconciler) CreateCloneSourcePod(image, pullPolicy string, pvc *co
return nil, err
}

+ workloadNodePlacement, err = cc.AdjustWorkloadNodePlacement(context.TODO(), r.client, workloadNodePlacement, pvc)
+ if err != nil {
+ return nil, fmt.Errorf("failed to adjust workload node placement: %w", err)
+ }
+
sourcePvc, err := r.getCloneRequestSourcePVC(pvc)
if err != nil {
return nil, err
diff --git a/pkg/controller/clone/prep-claim.go b/pkg/controller/clone/prep-claim.go
index 9317b7429..68a249b77 100644
--- a/pkg/controller/clone/prep-claim.go
+++ b/pkg/controller/clone/prep-claim.go
@@ -139,6 +139,11 @@ func (p *PrepClaimPhase) createPod(ctx context.Context, name string, pvc *corev1
return err
}

+ workloadNodePlacement, err = cc.AdjustWorkloadNodePlacement(context.TODO(), p.Client, workloadNodePlacement, pvc)
+ if err != nil {
+ return fmt.Errorf("failed to adjust workload node placement: %w", err)
+ }
+
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: name,
diff --git a/pkg/controller/common/util.go b/pkg/controller/common/util.go
index 48c73628d..f2a751805 100644
--- a/pkg/controller/common/util.go
+++ b/pkg/controller/common/util.go
@@ -21,6 +21,7 @@ import (
"crypto/rand"
"crypto/rsa"
"crypto/tls"
+ "encoding/json"
"fmt"
"io"
"math"
@@ -95,6 +96,11 @@ const (
// AnnExternalPopulation annotation marks a PVC as "externally populated", allowing the import-controller to skip it
AnnExternalPopulation = AnnAPIGroup + "/externalPopulation"

+ // AnnProvisionerTolerations annotation specifies tolerations to use for provisioners.
+ AnnProvisionerTolerations = "virt.deckhouse.io/provisioner-tolerations"
+ // AnnProvisionerName provides a name of data volume provisioner.
+ AnnProvisionerName = "virt.deckhouse.io/provisioner-name"
+
// AnnDeleteAfterCompletion is PVC annotation for deleting DV after completion
AnnDeleteAfterCompletion = AnnAPIGroup + "/storage.deleteAfterCompletion"
// AnnPodRetainAfterCompletion is PVC annotation for retaining transfer pods after completion
@@ -780,6 +786,50 @@ func GetWorkloadNodePlacement(ctx context.Context, c client.Client) (*sdkapi.Nod
return &cr.Spec.Workloads, nil
}

+// AdjustWorkloadNodePlacement adds tolerations specified in prime pvc annotation.
+func AdjustWorkloadNodePlacement(ctx context.Context, c client.Client, nodePlacement *sdkapi.NodePlacement, primePVC *corev1.PersistentVolumeClaim) (*sdkapi.NodePlacement, error) {
+ targetPVCKey := types.NamespacedName{
+ Namespace: primePVC.Namespace,
+ }
+
+ for _, ref := range primePVC.OwnerReferences {
+ if ref.Kind == "PersistentVolumeClaim" {
+ targetPVCKey.Name = ref.Name
+ }
+ }
+
+ var targetPVC corev1.PersistentVolumeClaim
+ err := c.Get(ctx, targetPVCKey, &targetPVC)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get target pvc %s: %w", targetPVCKey, err)
+ }
+
+ provisionerTolerations, err := ExtractProvisionerTolerations(&targetPVC)
+ if err != nil {
+ return nil, fmt.Errorf("failed to extract provisioner tolerations: %w", err)
+ }
+
+ nodePlacement.Tolerations = append(nodePlacement.Tolerations, provisionerTolerations...)
+
+ return nodePlacement, nil
+}
+
+func ExtractProvisionerTolerations(obj client.Object) ([]corev1.Toleration, error) {
+ rawTolerations := obj.GetAnnotations()[AnnProvisionerTolerations]
+
+ if rawTolerations == "" {
+ return nil, nil
+ }
+
+ var tolerations []corev1.Toleration
+ err := json.Unmarshal([]byte(rawTolerations), &tolerations)
+ if err != nil {
+ return nil, fmt.Errorf("failed to unmarshal provisioner tolerations %s: %w", rawTolerations, err)
+ }
+
+ return tolerations, nil
+}
+
// GetActiveCDI returns the active CDI CR
func GetActiveCDI(ctx context.Context, c client.Client) (*cdiv1.CDI, error) {
crList := &cdiv1.CDIList{}
diff --git a/pkg/controller/datavolume/controller-base.go b/pkg/controller/datavolume/controller-base.go
index b8c9f893e..99f8501be 100644
--- a/pkg/controller/datavolume/controller-base.go
+++ b/pkg/controller/datavolume/controller-base.go
@@ -1145,6 +1145,11 @@ func (r *ReconcilerBase) newPersistentVolumeClaim(dataVolume *cdiv1.DataVolume,
annotations[k] = v
}
annotations[cc.AnnPodRestarts] = "0"
+
+ if dataVolume.Annotations[cc.AnnProvisionerTolerations] != "" {
+ annotations[cc.AnnProvisionerTolerations] = dataVolume.Annotations[cc.AnnProvisionerTolerations]
+ }
+
annotations[cc.AnnContentType] = string(cc.GetContentType(dataVolume.Spec.ContentType))
if dataVolume.Spec.PriorityClassName != "" {
annotations[cc.AnnPriorityClassName] = dataVolume.Spec.PriorityClassName
diff --git a/pkg/controller/datavolume/pvc-clone-controller.go b/pkg/controller/datavolume/pvc-clone-controller.go
index e9d18ef30..f879408fe 100644
--- a/pkg/controller/datavolume/pvc-clone-controller.go
+++ b/pkg/controller/datavolume/pvc-clone-controller.go
@@ -42,6 +42,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/source"

cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"
+
"kubevirt.io/containerized-data-importer/pkg/common"
cc "kubevirt.io/containerized-data-importer/pkg/controller/common"
featuregates "kubevirt.io/containerized-data-importer/pkg/feature-gates"
@@ -683,6 +684,12 @@ func (r *PvcCloneReconciler) makeSizeDetectionPodSpec(
if err != nil {
return nil
}
+
+ workloadNodePlacement.Tolerations, err = cc.ExtractProvisionerTolerations(dv)
+ if err != nil {
+ return nil
+ }
+
// Generate individual specs
objectMeta := makeSizeDetectionObjectMeta(sourcePvc)
volume := makeSizeDetectionVolumeSpec(sourcePvc.Name)
diff --git a/pkg/controller/import-controller.go b/pkg/controller/import-controller.go
index 49f1ff898..ba9fcb531 100644
--- a/pkg/controller/import-controller.go
+++ b/pkg/controller/import-controller.go
@@ -859,6 +859,11 @@ func createImporterPod(ctx context.Context, log logr.Logger, client client.Clien
return nil, err
}

+ args.workloadNodePlacement, err = cc.AdjustWorkloadNodePlacement(context.TODO(), client, args.workloadNodePlacement, args.pvc)
+ if err != nil {
+ return nil, fmt.Errorf("failed to adjust workload node placement: %w", err)
+ }
+
if isRegistryNodeImport(args) {
args.importImage, err = getRegistryImportImage(args.pvc)
if err != nil {
diff --git a/pkg/controller/populators/populator-base.go b/pkg/controller/populators/populator-base.go
index 6c6fd8f8a..a69ce4f2a 100644
--- a/pkg/controller/populators/populator-base.go
+++ b/pkg/controller/populators/populator-base.go
@@ -223,7 +223,7 @@ type updatePVCAnnotationsFunc func(pvc, pvcPrime *corev1.PersistentVolumeClaim)

var desiredAnnotations = []string{cc.AnnPodPhase, cc.AnnPodReady, cc.AnnPodRestarts,
cc.AnnPreallocationRequested, cc.AnnPreallocationApplied, cc.AnnCurrentCheckpoint, cc.AnnMultiStageImportDone,
- cc.AnnRunningCondition, cc.AnnRunningConditionMessage, cc.AnnRunningConditionReason}
+ cc.AnnRunningCondition, cc.AnnRunningConditionMessage, cc.AnnRunningConditionReason, cc.AnnProvisionerName}

func (r *ReconcilerBase) updatePVCWithPVCPrimeAnnotations(pvc, pvcPrime *corev1.PersistentVolumeClaim, updateFunc updatePVCAnnotationsFunc) (*corev1.PersistentVolumeClaim, error) {
pvcCopy := pvc.DeepCopy()
diff --git a/pkg/controller/upload-controller.go b/pkg/controller/upload-controller.go
index f251cae5d..ed4420fb9 100644
--- a/pkg/controller/upload-controller.go
+++ b/pkg/controller/upload-controller.go
@@ -623,6 +623,11 @@ func (r *UploadReconciler) createUploadPod(args UploadPodArgs) (*corev1.Pod, err
return nil, err
}

+ workloadNodePlacement, err = cc.AdjustWorkloadNodePlacement(context.TODO(), r.client, workloadNodePlacement, args.PVC)
+ if err != nil {
+ return nil, fmt.Errorf("failed to adjust workload node placement: %w", err)
+ }
+
pod := r.makeUploadPodSpec(args, podResourceRequirements, imagePullSecrets, workloadNodePlacement)
util.SetRecommendedLabels(pod, r.installerLabels, "cdi-controller")

diff --git a/pkg/controller/util.go b/pkg/controller/util.go
index 81e050464..367b5453b 100644
--- a/pkg/controller/util.go
+++ b/pkg/controller/util.go
@@ -298,7 +298,21 @@ func podSucceededFromPVC(pvc *corev1.PersistentVolumeClaim) bool {
}

func setAnnotationsFromPodWithPrefix(anno map[string]string, pod *corev1.Pod, termMsg *common.TerminationMessage, prefix string) {
- if pod == nil || pod.Status.ContainerStatuses == nil {
+ if pod == nil {
+ return
+ }
+
+ for _, cond := range pod.Status.Conditions {
+ if cond.Type != corev1.PodScheduled {
+ continue
+ }
+
+ anno[cc.AnnProvisionerName] = pod.Name
+
+ break
+ }
+
+ if pod.Status.ContainerStatuses == nil {
return
}
annPodRestarts, _ := strconv.Atoi(anno[cc.AnnPodRestarts])
4 changes: 4 additions & 0 deletions images/cdi-artifact/patches/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,7 @@ This is necessary for ensuring that the metrics can be accessed only by Promethe
Currently covered metrics:
- cdi-controller
- cdi-deployment

#### `019-manage-provisioner-tolerations.patch`

Add annotation to manage provisioner tolerations to avoid unschedulable error.
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,26 @@ const (
VIShortName = "vi"

// AnnAPIGroup is the APIGroup for virtualization-controller.
AnnAPIGroup = "virt.deckhouse.io"
AnnAPIGroup = "virt.deckhouse.io"
AnnAPIGroupCDI = "cdi.kubevirt.io"

// AnnCreatedBy is a pod annotation indicating if the pod was created by the PVC
// AnnCreatedBy is a pod annotation indicating if the pod was created by the PVC.
AnnCreatedBy = AnnAPIGroup + "/storage.createdByController"

// AnnPodRetainAfterCompletion is PVC annotation for retaining transfer pods after completion
AnnPodRetainAfterCompletion = AnnAPIGroup + "/storage.pod.retainAfterCompletion"

// AnnUploadURL provides a const for CVMI/VMI/VMD uploadURL annotation
// AnnUploadURL provides a const for CVMI/VMI/VMD uploadURL annotation.
AnnUploadURL = AnnAPIGroup + "/upload.url"

// AnnTolerationsHash provides a const for annotation with hash of applied tolerations.
AnnTolerationsHash = AnnAPIGroup + "/tolerations-hash"

// AnnProvisionerTolerations provides a const for tolerations to use for provisioners.
AnnProvisionerTolerations = AnnAPIGroup + "/provisioner-tolerations"
// AnnProvisionerName provides a name of data volume provisioner.
AnnProvisionerName = AnnAPIGroup + "/provisioner-name"

// AnnDefaultStorageClass is the annotation indicating that a storage class is the default one.
AnnDefaultStorageClass = "storageclass.kubernetes.io/is-default-class"

Expand Down Expand Up @@ -70,6 +79,10 @@ const (
AppKubernetesManagedByLabel = "app.kubernetes.io/managed-by"
// AppKubernetesComponentLabel is the Kubernetes recommended component label
AppKubernetesComponentLabel = "app.kubernetes.io/component"

AnnPodScheduledCondition = AnnAPIGroupCDI + "/pod.condition.scheduled"
AnnPodScheduledConditionMessage = AnnAPIGroupCDI + "/pod.condition.scheduled.message"
AnnPodScheduledConditionReason = AnnAPIGroupCDI + "/pod.condition.scheduled.reason"
)

// AddAnnotation adds an annotation to an object
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
Copyright 2024 Flant JSC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package provisioner

import (
"encoding/base64"
"encoding/json"

corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/deckhouse/virtualization-controller/pkg/common/annotations"
)

type NodePlacement struct {
// tolerations is a list of tolerations applied to the relevant kind of pods
// See https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ for more info.
// These are additional tolerations other than default ones.
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
}

func IsNodePlacementChanged(nodePlacement *NodePlacement, obj client.Object) (bool, error) {
oldHash, exists := obj.GetAnnotations()[annotations.AnnTolerationsHash]

if nodePlacement == nil && exists {
return true, nil
}

if (nodePlacement == nil || len(nodePlacement.Tolerations) == 0) && !exists {
return false, nil
}

JSON, err := json.Marshal(nodePlacement.Tolerations)
if err != nil {
return false, err
}

newHash := base64.StdEncoding.EncodeToString(JSON)

return oldHash != newHash, nil
}

func KeepNodePlacementTolerations(nodePlacement *NodePlacement, obj client.Object) error {
anno := obj.GetAnnotations()

if nodePlacement == nil || len(nodePlacement.Tolerations) == 0 {
_, ok := anno[annotations.AnnTolerationsHash]
if !ok {
return nil
}

delete(anno, annotations.AnnTolerationsHash)

obj.SetAnnotations(anno)

return nil
}

JSON, err := json.Marshal(nodePlacement.Tolerations)
if err != nil {
return err
}

if anno == nil {
anno = make(map[string]string)
}

anno[annotations.AnnTolerationsHash] = base64.StdEncoding.EncodeToString(JSON)

obj.SetAnnotations(anno)

return nil
}
Loading

0 comments on commit 4380a3a

Please sign in to comment.