Skip to content

Commit

Permalink
[ci skip] 2024.09.05-31244
Browse files Browse the repository at this point in the history
  • Loading branch information
cybozu-neco committed Sep 5, 2024
2 parents 438f056 + 6edbcce commit 68404de
Show file tree
Hide file tree
Showing 13 changed files with 189 additions and 21 deletions.
8 changes: 3 additions & 5 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -340,13 +340,12 @@ jobs:
printf "%s\n" "$diffs"
# Skip the following steps if there is no difference.
- restore-tools
- run-dctest-bootstrap:
tag: release
- run-dctest-bootstrap
- run:
name: Run dctest functions release
command: |
if [ -f .skip ]; then exit 0; fi
./bin/run-dctest-suite.sh functions release
./bin/run-dctest-suite.sh functions
no_output_timeout: 20m
- run:
name: Set the instance lifetime
Expand Down Expand Up @@ -381,11 +380,10 @@ jobs:
- run: git merge --no-commit origin/main
- restore-tools
- run-dctest-bootstrap:
tag: release
datacenter: staging
- run:
name: Run dctest upgrade release
command: ./bin/run-dctest-suite.sh upgrade release
command: ./bin/run-dctest-suite.sh upgrade
no_output_timeout: 20m
- run:
name: Set the instance lifetime
Expand Down
2 changes: 1 addition & 1 deletion Makefile.common
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ STERN_VERSION = 1.30.0


## These should be updated regularly
ARGOCD_VERSION = 2.10.2
ARGOCD_VERSION = 2.11.7
# Follow Argo CD installed kustomize version
# https://github.com/cybozu/neco-containers/blob/main/argocd/Dockerfile#L10
KUSTOMIZE_VERSION = 5.2.1
Expand Down
4 changes: 2 additions & 2 deletions artifacts.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions artifacts_ignore.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
images:
- repository: ghcr.io/cybozu/cilium
versions: ["1.14.13.1", "1.14.13.2"]
versions: ["1.14.13.1", "1.14.13.2", "1.14.14.1"]
- repository: ghcr.io/cybozu/cilium-operator-generic
versions: ["1.14.13.1"]
versions: ["1.14.13.1", "1.14.14.1"]
- repository: ghcr.io/cybozu/hubble-relay
versions: ["1.14.13.1", "1.14.13.2"]
versions: ["1.14.13.1", "1.14.13.2", "1.14.14.1"]
- repository: ghcr.io/cybozu/cilium-certgen
versions: ["0.1.14.1"]
- repository: ghcr.io/cybozu-go/coil
versions: ["2.7.2"]
- repository: ghcr.io/cybozu/etcd
versions: ["3.5.15.1"]
osImage:
- channel: stable
versions: ["3975.2.0"]
5 changes: 5 additions & 0 deletions dctest/bmc-user.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,10 @@
"hash": "hashed_secret",
"salt": "salt for hash"
}
},
"support": {
"password": {
"raw": "raw password for support user"
}
}
}
6 changes: 3 additions & 3 deletions etc/coil-pre.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4571,7 +4571,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.name
image: ghcr.io/cybozu-go/coil:2.7.1
image: ghcr.io/cybozu-go/coil:2.7.2
livenessProbe:
httpGet:
host: localhost
Expand Down Expand Up @@ -4655,7 +4655,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
image: ghcr.io/cybozu-go/coil:2.7.1
image: ghcr.io/cybozu-go/coil:2.7.2
livenessProbe:
httpGet:
host: localhost
Expand Down Expand Up @@ -4701,7 +4701,7 @@ spec:
configMapKeyRef:
key: cni_netconf
name: coil-config-tc2gh94mfh
image: ghcr.io/cybozu-go/coil:2.7.1
image: ghcr.io/cybozu-go/coil:2.7.2
name: coil-installer
securityContext:
privileged: true
Expand Down
6 changes: 3 additions & 3 deletions etc/coil.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4571,7 +4571,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.name
image: ghcr.io/cybozu-go/coil:2.7.1
image: ghcr.io/cybozu-go/coil:2.7.2
livenessProbe:
httpGet:
host: localhost
Expand Down Expand Up @@ -4655,7 +4655,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
image: ghcr.io/cybozu-go/coil:2.7.1
image: ghcr.io/cybozu-go/coil:2.7.2
livenessProbe:
httpGet:
host: localhost
Expand Down Expand Up @@ -4698,7 +4698,7 @@ spec:
configMapKeyRef:
key: cni_netconf
name: coil-config-tc2gh94mfh
image: ghcr.io/cybozu-go/coil:2.7.1
image: ghcr.io/cybozu-go/coil:2.7.2
name: coil-installer
securityContext:
privileged: true
Expand Down
4 changes: 2 additions & 2 deletions installer/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ BIRD2_SHA512 = 21b17a1e52dba90e0f35bb6b0cd8048c355de4c8364951d495e50d6e387ca807c
LIBSSH_URL = http://archive.ubuntu.com/ubuntu/pool/main/libs/libssh/libssh-gcrypt-4_0.9.6-2build1_amd64.deb
LIBSSH_DEB = build/$(notdir $(LIBSSH_URL))
LIBSSH_SHA512 = c0a52a502da59cc644e178b351dc1edfdc44e04ea61184fc858e6d9dbdf161b27f2a30371e721a3ffb9c513ac23706fecc0df1b21259ec1183788049bf64547d
LIBSSL3_URL = http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl3_3.0.2-0ubuntu1.15_amd64.deb
LIBSSL3_URL = http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl3_3.0.2-0ubuntu1.18_amd64.deb
LIBSSL3_DEB = build/$(notdir $(LIBSSL3_URL))
LIBSSL3_SHA512 = b606ddae8d142aaaa141fbe96c01462c54dc6f7c0e16ab412d156ac2fc7d83221c1ee27ff84ec5d6a0baefe2b7e4032beb226bdda4fb28a90e9631e8fe84ceb5
LIBSSL3_SHA512 = c00eb14fd00ed3b23bed723f1bf7e00bfa1919b2c1f24ad81a1cc693f19a424a93d7123898e4ce91b4ac32c169fab3dd5d0274668cb8bc3c447a01f486165517
endif
DEBS = $(CHRONY_DEB) $(BIRD2_DEB) $(LIBSSH_DEB) $(LIBSSL3_DEB)

Expand Down
36 changes: 36 additions & 0 deletions pkg/neco-rebooter/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type EntriesCollection struct {
NewEntry []*neco.RebootListEntry
CancelledEntry []EntrySet
QueuedEntry []EntrySet
OrphanedEntry []EntrySet
}

var (
Expand Down Expand Up @@ -112,6 +113,20 @@ func (c *Controller) dequeueAndCancelEntry(ctx context.Context, entries []EntryS
return nil
}

func (c *Controller) RemoveOrphanedEntry(ctx context.Context, entries []EntrySet) error {
for _, entry := range entries {
if entry.rebootQueueEntry != nil {
entry.rebootQueueEntry.Status = cke.RebootStatusCancelled
err := c.ckeStorage.UpdateRebootsEntry(ctx, entry.rebootQueueEntry)
if err != nil {
return err
}
slog.With(slog.String("operation", "RemoveOrphanedEntry")).Info("rebootQueueEntry cancelled", slog.String("node", entry.rebootQueueEntry.Node))
}
}
return nil
}

func (c *Controller) addRebootListEntry(ctx context.Context, entries []*neco.RebootListEntry) error {
for _, entry := range entries {
entry.Status = neco.RebootListEntryStatusQueued
Expand Down Expand Up @@ -177,12 +192,17 @@ func (c *Controller) collectEntries(rebootListEntries []*neco.RebootListEntry, r
newEntry := []*neco.RebootListEntry{}
cancelledEntry := []EntrySet{}
queuedEntry := []EntrySet{}
orphanedEntry := []EntrySet{}
for _, entry := range rebootListEntries {
rqEntry := findRebootQueueEntryFromRebootListEntry(rebootQueueEntries, *entry)
switch entry.Status {
case neco.RebootListEntryStatusCancelled:
cancelledEntry = append(cancelledEntry, EntrySet{entry, rqEntry})
case neco.RebootListEntryStatusPending:
// avoid the duplicating of entry
if rqEntry != nil {
break
}
if entry.Group == processingGroup && c.isRebootable(entry) {
newEntry = append(newEntry, entry)
}
Expand All @@ -197,12 +217,21 @@ func (c *Controller) collectEntries(rebootListEntries []*neco.RebootListEntry, r
}
}
}
for _, rqEntry := range rebootQueueEntries {
rlEntry := findRebootListEntryFromRebootQueueEntry(rebootListEntries, *rqEntry)
if rlEntry == nil {
orphanedEntry = append(orphanedEntry, EntrySet{nil, rqEntry})
} else if rlEntry.Status == neco.RebootListEntryStatusPending {
orphanedEntry = append(orphanedEntry, EntrySet{rlEntry, rqEntry})
}
}
return EntriesCollection{
CompletedEntry: completedEntry,
TimedOutEntry: timedOutEntry,
NewEntry: newEntry,
CancelledEntry: cancelledEntry,
QueuedEntry: queuedEntry,
OrphanedEntry: orphanedEntry,
}
}

Expand Down Expand Up @@ -279,6 +308,13 @@ func (c *Controller) runOnce(ctx context.Context) error {
}
return nil
}
if len(collection.OrphanedEntry) > 0 {
err = c.RemoveOrphanedEntry(ctx, collection.OrphanedEntry)
if err != nil {
return err
}
return nil
}
err = c.removeCompletedEntry(ctx, collection.CompletedEntry)
if err != nil {
return err
Expand Down
94 changes: 94 additions & 0 deletions pkg/neco-rebooter/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,77 @@ func TestDequeueAndCancelEntry(t *testing.T) {
}
}

func TestRemoveOrphanedEntry(t *testing.T) {
c, err := newTestController()
if err != nil {
t.Fatal(err)
}
defer func() {
err := cleanupEtcd()
if err != nil {
t.Fatal(err)
}
}()

rebootListEntry := neco.RebootListEntry{
Node: "node1",
Group: "group1",
RebootTime: "test1",
Status: neco.RebootListEntryStatusPending,
}
rebootQueueEntry := []*cke.RebootQueueEntry{
{
Node: "node1",
Status: cke.RebootStatusQueued,
},
{
Node: "node2",
Status: cke.RebootStatusQueued,
},
}
entrySet := []EntrySet{
{
rebootListEntry: &rebootListEntry,
rebootQueueEntry: rebootQueueEntry[0],
},
{
rebootQueueEntry: rebootQueueEntry[1],
},
}

err = c.necoStorage.RegisterRebootListEntry(context.Background(), &rebootListEntry)
if err != nil {
t.Fatal(err)
}
for _, entry := range rebootQueueEntry {
err = c.ckeStorage.RegisterRebootsEntry(context.Background(), entry)
if err != nil {
t.Fatal(err)
}
}

err = c.RemoveOrphanedEntry(context.Background(), entrySet)
if err != nil {
t.Fatal(err)
}
rlEntries, err := c.necoStorage.GetRebootListEntries(context.Background())
if err != nil {
t.Fatal(err)
}
rqEntries, err := c.ckeStorage.GetRebootsEntries(context.Background())
if err != nil {
t.Fatal(err)
}
if len(rlEntries) != 1 {
t.Error("RemoveOrphanedEntry failed")
}
for _, entry := range rqEntries {
if entry.Status != cke.RebootStatusCancelled {
t.Error("RemoveOrphanedEntry failed")
}
}
}

func TestAddRebootListEntry(t *testing.T) {
c, err := newTestController()
if err != nil {
Expand Down Expand Up @@ -455,6 +526,12 @@ groupLabelKey: topology.kubernetes.io/zone
RebootTime: "test1",
Status: neco.RebootListEntryStatusQueued,
},
{ // orphaned node
Node: "node6",
Group: "group1",
RebootTime: "test1",
Status: neco.RebootListEntryStatusPending,
},
}
rebootQueueEntry := []*cke.RebootQueueEntry{
{
Expand All @@ -469,6 +546,14 @@ groupLabelKey: topology.kubernetes.io/zone
Node: "node5",
Status: cke.RebootStatusQueued,
},
{ // orphaned node
Node: "node6",
Status: cke.RebootStatusQueued,
},
{ // orphaned node
Node: "node7",
Status: cke.RebootStatusQueued,
},
}
collection := c.collectEntries(rebootListEntries, rebootQueueEntry, "group1")
if len(collection.CancelledEntry) != 1 {
Expand Down Expand Up @@ -534,6 +619,15 @@ groupLabelKey: topology.kubernetes.io/zone
t.Error("QueuedEntry is not expected value")
}
}

if len(collection.OrphanedEntry) != 2 {
t.Error("number of OrphanedEntry is not expected value, actual ", len(collection.OrphanedEntry))
}
for _, es := range collection.OrphanedEntry {
if es.rebootQueueEntry.Node != "node6" && es.rebootQueueEntry.Node != "node7" {
t.Error("OrphanedEntry is not expected value, actual ", es.rebootQueueEntry.Node)
}
}
}
func TestIsRebootable(t *testing.T) {
c, err := newTestController()
Expand Down
8 changes: 8 additions & 0 deletions pkg/neco-rebooter/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,14 @@ func (c *collector) updateProcessingGroup(ch chan<- prometheus.Metric, ctx conte
slog.Error("failed to get processing group", "err", err)
return
}
entries, err := c.storage.GetRebootListEntries(ctx)
if err != nil {
slog.Error("failed to get reboot list", "err", err)
return
}
if len(entries) == 0 {
return
}
ch <- prometheus.MustNewConstMetric(c.processingGroup, prometheus.GaugeValue, 1, group)
}

Expand Down
19 changes: 19 additions & 0 deletions pkg/neco-rebooter/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,23 @@ func TestMetrics(t *testing.T) {
t.Errorf("number of %s is expected to 1 %s, but got %d", metrics, tc.expect, strings.Count(metrics, tc.expect))
}
}

// Remove all entries
entries, err := c.necoStorage.GetRebootListEntries(context.Background())
if err != nil {
t.Fatal(err)
}
for _, entry := range entries {
err = c.necoStorage.RemoveRebootListEntry(context.Background(), c.leaderKey, entry)
if err != nil {
t.Fatal(err)
}
}
req = httptest.NewRequest("GET", "/metrics", nil)
rec = httptest.NewRecorder()
ts.Config.Handler.ServeHTTP(rec, req)
metrics = rec.Body.String()
if strings.Contains(metrics, "neco_rebooter_processing_group") {
t.Errorf("unexpected metrics: %s", metrics)
}
}
Loading

0 comments on commit 68404de

Please sign in to comment.