Skip to content

Commit

Permalink
[ci skip] 2024.09.10-31489
Browse files Browse the repository at this point in the history
  • Loading branch information
cybozu-neco committed Sep 10, 2024
2 parents a8991f0 + c9516ad commit 072eb21
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 5 deletions.
2 changes: 1 addition & 1 deletion dctest/join_remove_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func testJoinRemove() {
execSafeAt(bootServers[0], "sudo", "env", "VAULT_TOKEN="+token, "neco", "leave", "3")

By("Waiting for the request to complete")
waitRequestComplete("members: [0 1 2]", true)
waitRequestCompleteWithRecover("members: [0 1 2]", 3)

By("Waiting boot-3 gets removed from etcd")
Eventually(func() error {
Expand Down
20 changes: 17 additions & 3 deletions dctest/run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,20 +193,28 @@ func execRetryAt(host string, handler retryHandler, args ...string) []byte {
}

// waitRequestComplete waits for the current request to be completed.
// If the requests is aborted, it try to recover a specified number of times.
// If check is not "", the contents is also checked against the output from "neco status".
func waitRequestComplete(check string, recover ...bool) {
func waitRequestCompleteWithRecover(check string, recoverMax int) {
// wait a moment for neco-updater to put a new request.
time.Sleep(time.Second * 2)

EventuallyWithOffset(1, func() error {
recoverCount := 0
Eventually(func() error {
stdout, stderr, err := execAt(bootServers[0], "neco", "status")
if err != nil {
return fmt.Errorf("stdout: %s, stderr: %s, err: %v", stdout, stderr, err)
}
out := string(stdout)

// Sometimes, neco-worker aborts the update process. Detect it and recover if it is necessary.
if len(recover) != 0 && recover[0] && strings.Contains(out, "status: aborted") {
if strings.Contains(out, "status: aborted") {
if recoverCount >= recoverMax {
return StopTrying("update process is aborted: " + out)
}
recoverCount++
fmt.Println(out)
fmt.Println("update request is aborted, try to recover...")
execAt(bootServers[0], "neco", "recover")
return errors.New("update process is aborted: " + out)
}
Expand All @@ -222,6 +230,12 @@ func waitRequestComplete(check string, recover ...bool) {
}).Should(Succeed())
}

// waitRequestComplete waits for the current request to be completed.
// If check is not "", the contents is also checked against the output from "neco status".
func waitRequestComplete(check string) {
waitRequestCompleteWithRecover(check, 0)
}

func getVaultToken() string {
var token string
Eventually(func() error {
Expand Down
2 changes: 1 addition & 1 deletion dctest/setup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ WantedBy=multi-user.target`

It("should complete updates", func() {
By("Waiting for request to complete")
waitRequestComplete("")
waitRequestCompleteWithRecover("", 3)

By("Installing sshd_config and sudoers")
for _, h := range bootServers {
Expand Down

0 comments on commit 072eb21

Please sign in to comment.