Skip to content

Commit

Permalink
Merge pull request #534 from red-hat-storage/sync_ds--master
Browse files Browse the repository at this point in the history
Syncing latest changes from master for rook
  • Loading branch information
travisn authored Nov 1, 2023
2 parents 742172b + d0fb79e commit 78ee910
Show file tree
Hide file tree
Showing 17 changed files with 426 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/helm-lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
python-version: 3.9

- name: Set up chart-testing
uses: helm/chart-testing-action@v2.4.0
uses: helm/chart-testing-action@v2.6.0

- name: Run chart-testing (lint)
run: ct lint --charts=./deploy/charts/rook-ceph --validate-yaml=false --validate-maintainers=false
9 changes: 9 additions & 0 deletions Documentation/Contributing/development-environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,12 @@ docker tag "local/ceph-$(go env GOARCH)" 'rook/ceph:master'

4) Create a Rook cluster in minikube, or if the Rook cluster is already configured, apply the new
operator image by restarting the operator.


## Creating a dev cluster

To accelerate the development process, users have the option to employ the script located
at `tests/scripts/create-dev-cluster.sh`. This script is designed to rapidly set
up a new minikube environment, apply the CRDs and the common file, and then utilize the
`cluster-test.yaml` script to create the Rook cluster. Once setup, users can use the different `*-test.yaml`
files from the `deploy/examples/` directory to configure their clusters.
2 changes: 1 addition & 1 deletion Documentation/Troubleshooting/disaster-recovery.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ the CRs to their prior state without even necessarily suffering cluster downtime
the valdiating webhook in order to make changes.

```console
kubectl delete ValidatingWebhookConfiguration rook-ceph-webhook
kubectl -n rook-ceph delete ValidatingWebhookConfiguration rook-ceph-webhook
```

4. Remove the owner references from all critical Rook resources that were referencing the `CephCluster` CR.
Expand Down
158 changes: 158 additions & 0 deletions deploy/examples/cluster-on-pvc-minikube.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#################################################################################################################
# Define the settings for the rook-ceph cluster with settings for a minikube cluster with a single node

# This example expects a single node minikube cluster with three extra disks: vdb, vdc and vdd. Please modify
# it according to your environment. See the documentation for more details on storage settings available.

# For example, to create the cluster:
# kubectl create -f crds.yaml -f common.yaml -f operator.yaml
# kubectl create -f cluster-on-pvc-minikube.yaml
#################################################################################################################
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: local-storage
provisioner: kubernetes.io/no-provisioner
volumeBindingMode: WaitForFirstConsumer
---
kind: PersistentVolume
apiVersion: v1
metadata:
name: local0-0
spec:
storageClassName: local-storage
capacity:
storage: 10Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
# PV for mon must be a filesystem volume.
volumeMode: Filesystem
local:
# To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`.
path: /dev/vdb
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- minikube
---
kind: PersistentVolume
apiVersion: v1
metadata:
name: local0-1
spec:
storageClassName: local-storage
capacity:
storage: 20Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
# PV for mon must be a filesystem volume.
volumeMode: Block
local:
# To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`.
path: /dev/vdc
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- minikube
---
kind: PersistentVolume
apiVersion: v1
metadata:
name: local0-2
spec:
storageClassName: local-storage
capacity:
storage: 20Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
# PV for mon must be a filesystem volume.
volumeMode: Block
local:
# To use dm devices like logical volume, please replace `/dev/sdb` with their device names like `/dev/vg-name/lv-name`.
path: /dev/vdd
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- minikube
---
apiVersion: ceph.rook.io/v1
kind: CephCluster
metadata:
name: my-cluster
namespace: rook-ceph # namespace:cluster
spec:
dataDirHostPath: /var/lib/rook
mon:
count: 1
allowMultiplePerNode: true
volumeClaimTemplate:
spec:
storageClassName: local-storage
resources:
requests:
storage: 10Gi
mgr:
count: 1
modules:
- name: pg_autoscaler
enabled: true
dashboard:
enabled: true
ssl: false
crashCollector:
disable: false
cephVersion:
image: quay.io/ceph/ceph:v18
allowUnsupported: false
skipUpgradeChecks: false
continueUpgradeAfterChecksEvenIfNotHealthy: false
storage:
storageClassDeviceSets:
- name: set1
count: 2
portable: false
tuneDeviceClass: true
tuneFastDeviceClass: false
encrypted: false
placement:
preparePlacement:
volumeClaimTemplates:
- metadata:
name: data
# if you are looking at giving your OSD a different CRUSH device class than the one detected by Ceph
# annotations:
# crushDeviceClass: hybrid
spec:
resources:
requests:
storage: 20Gi
# IMPORTANT: Change the storage class depending on your environment
storageClassName: local-storage
volumeMode: Block
accessModes:
- ReadWriteOnce
# when onlyApplyOSDPlacement is false, will merge both placement.All() and storageClassDeviceSets.Placement
onlyApplyOSDPlacement: false
priorityClassNames:
mon: system-node-critical
osd: system-node-critical
mgr: system-cluster-critical
disruptionManagement:
managePodBudgets: true
osdMaintenanceTimeout: 30
pgHealthCheckTimeout: 0
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ go 1.20

require (
github.com/IBM/keyprotect-go-client v0.12.2
github.com/aws/aws-sdk-go v1.46.1
github.com/aws/aws-sdk-go v1.46.6
github.com/banzaicloud/k8s-objectmatcher v1.8.0
github.com/ceph/go-ceph v0.24.0
github.com/coreos/pkg v0.0.0-20230601102743-20bbbf26f4d8
github.com/csi-addons/kubernetes-csi-addons v0.7.0
github.com/gemalto/kmip-go v0.0.10
github.com/go-ini/ini v1.67.0
github.com/google/go-cmp v0.6.0
github.com/google/uuid v1.3.1
github.com/google/uuid v1.4.0
github.com/hashicorp/vault/api v1.10.0
github.com/jetstack/cert-manager v1.7.3
github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0
Expand All @@ -39,7 +39,7 @@ require (
k8s.io/utils v0.0.0-20230726121419-3b25d923346b
sigs.k8s.io/controller-runtime v0.16.3
sigs.k8s.io/mcs-api v0.1.0
sigs.k8s.io/yaml v1.3.0
sigs.k8s.io/yaml v1.4.0
)

require (
Expand Down
11 changes: 6 additions & 5 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -450,8 +450,8 @@ github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkY
github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
github.com/aws/aws-sdk-go v1.44.164/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/aws/aws-sdk-go v1.46.1 h1:U26quvBWFZMQuultLw5tloW4GnmWaChEwMZNq8uYatw=
github.com/aws/aws-sdk-go v1.46.1/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/aws/aws-sdk-go v1.46.6 h1:6wFnNC9hETIZLMf6SOTN7IcclrOGwp/n9SLp8Pjt6E8=
github.com/aws/aws-sdk-go v1.46.6/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/banzaicloud/k8s-objectmatcher v1.8.0 h1:Nugn25elKtPMTA2br+JgHNeSQ04sc05MDPmpJnd1N2A=
github.com/banzaicloud/k8s-objectmatcher v1.8.0/go.mod h1:p2LSNAjlECf07fbhDyebTkPUIYnU05G+WfGgkTmgeMg=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
Expand Down Expand Up @@ -763,8 +763,8 @@ github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/enterprise-certificate-proxy v0.0.0-20220520183353-fd19c99a87aa/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8=
github.com/googleapis/enterprise-certificate-proxy v0.1.0/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8=
github.com/googleapis/enterprise-certificate-proxy v0.2.0/go.mod h1:8C0jb7/mgJe/9KK8Lm7X9ctZC2t60YyIpYEI16jx0Qg=
Expand Down Expand Up @@ -1968,5 +1968,6 @@ sigs.k8s.io/structured-merge-diff/v4 v4.3.0 h1:UZbZAZfX0wV2zr7YZorDz6GXROfDFj6Lv
sigs.k8s.io/structured-merge-diff/v4 v4.3.0/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
4 changes: 2 additions & 2 deletions images/ceph/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ include ../image.mk
# Image Build Options

ifeq ($(GOARCH),amd64)
CEPH_VERSION ?= v17.2.6-20230410
CEPH_VERSION ?= v18.2.0-20231018
else
CEPH_VERSION ?= v17.2.6-20230410
CEPH_VERSION ?= v18.2.0-20231018
endif
REGISTRY_NAME = quay.io
BASEIMAGE = $(REGISTRY_NAME)/ceph/ceph-$(GOARCH):$(CEPH_VERSION)
Expand Down
23 changes: 23 additions & 0 deletions pkg/operator/ceph/cluster/mon/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,9 +366,32 @@ func (c *Cluster) checkHealth(ctx context.Context) error {
}
}

// failover mons running on host path to use persistent volumes if VolumeClaimTemplate is set and vice versa
for _, mon := range c.ClusterInfo.Monitors {
if c.HasMonPathChanged(mon.Name) {
logger.Infof("fail over mon %q due to change in mon path", mon.Name)
c.failMon(len(c.ClusterInfo.Monitors), desiredMonCount, mon.Name)
return nil
}
}

return nil
}

// HasMonPathChanged checks if the mon storage path has changed from host path to persistent volume or vice versa
func (c *Cluster) HasMonPathChanged(mon string) bool {
var monPathChanged bool
if c.mapping.Schedule[mon] != nil && c.spec.Mon.VolumeClaimTemplate != nil {
logger.Infof("mon %q path has changed from host path to persistent volumes", mon)
monPathChanged = true
} else if c.mapping.Schedule[mon] == nil && c.spec.Mon.VolumeClaimTemplate == nil {
logger.Infof("mon %q path has changed from persistent volumes to host path", mon)
monPathChanged = true
}

return monPathChanged
}

func (c *Cluster) trackMonInOrOutOfQuorum(monName string, inQuorum bool) (bool, error) {
updateNeeded := false
var monsOutOfQuorum []string
Expand Down
34 changes: 34 additions & 0 deletions pkg/operator/ceph/cluster/mon/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,3 +645,37 @@ func TestUpdateMonInterval(t *testing.T) {
assert.Equal(t, time.Minute, h.interval)
})
}

func TestHasMonPathChanged(t *testing.T) {
t.Run("mon path changed from pv to hostpath", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.mapping.Schedule["a"] = nil
result := c.HasMonPathChanged("a")
assert.True(t, result)
})

t.Run("mon path has not changed from pv to hostpath", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.spec.Mon.VolumeClaimTemplate = &v1.PersistentVolumeClaim{Spec: v1.PersistentVolumeClaimSpec{}}
c.mapping.Schedule["b"] = nil
result := c.HasMonPathChanged("b")
c.spec.Mon.VolumeClaimTemplate = nil
assert.False(t, result)
})

t.Run("mon path changed from hostpath to pv", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.mapping.Schedule["c"] = &opcontroller.MonScheduleInfo{}
c.spec.Mon.VolumeClaimTemplate = &v1.PersistentVolumeClaim{Spec: v1.PersistentVolumeClaimSpec{}}
result := c.HasMonPathChanged("c")
assert.True(t, result)
})

t.Run("mon path has not changed from host path to pv", func(t *testing.T) {
c := New(context.TODO(), &clusterd.Context{}, "ns", cephv1.ClusterSpec{}, nil)
c.mapping.Schedule["d"] = &opcontroller.MonScheduleInfo{}
result := c.HasMonPathChanged("d")
c.spec.Mon.VolumeClaimTemplate = nil
assert.False(t, result)
})
}
6 changes: 6 additions & 0 deletions pkg/operator/ceph/cluster/mon/mon.go
Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,12 @@ func (c *Cluster) commitMaxMonIDRequireIncrementing(desiredMaxMonID int, require
var updateDeploymentAndWait = UpdateCephDeploymentAndWait

func (c *Cluster) updateMon(m *monConfig, d *apps.Deployment) error {

if c.HasMonPathChanged(m.DaemonName) {
logger.Infof("path has changed for mon %q. Skip updating mon deployment %q in order to failover the mon", m.DaemonName, d.Name)
return nil
}

// Expand mon PVC if storage request for mon has increased in cephcluster crd
if c.monVolumeClaimTemplate(m) != nil {
desiredPvc, err := c.makeDeploymentPVC(m, false)
Expand Down
8 changes: 4 additions & 4 deletions pkg/operator/ceph/object/admin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ const firstPeriodUpdate = `{
"id": "1580fd1d-a065-4484-82ff-329e9a779999",
"name": "my-store",
"api_name": "my-store",
"is_master": "true",
"is_master": true,
"endpoints": [
"http://10.105.59.166:80"
],
Expand Down Expand Up @@ -489,7 +489,7 @@ const secondPeriodGet = `{
"id": "1580fd1d-a065-4484-82ff-329e9a779999",
"name": "my-store",
"api_name": "my-store",
"is_master": "true",
"is_master": true,
"endpoints": [
"http://10.105.59.166:80"
],
Expand Down Expand Up @@ -575,7 +575,7 @@ const secondPeriodUpdateWithoutChanges = `{
"id": "1580fd1d-a065-4484-82ff-329e9a779999",
"name": "my-store",
"api_name": "my-store",
"is_master": "true",
"is_master": true,
"endpoints": [
"http://10.105.59.166:80"
],
Expand Down Expand Up @@ -659,7 +659,7 @@ const secondPeriodUpdateWithChanges = `{
"id": "1580fd1d-a065-4484-82ff-329e9a779999",
"name": "my-store",
"api_name": "my-store",
"is_master": "true",
"is_master": true,
"endpoints": [
"http://10.105.59.166:80",
"https://10.105.59.166:443"
Expand Down
Loading

0 comments on commit 78ee910

Please sign in to comment.