Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport of VAULT-30819: verify DR secondary leader before unsealing followers into release/1.17.x #28486

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion enos/enos-dev-scenario-pr-replication.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,7 @@ scenario "dev_pr_replication" {
Depending on how we're configured we'll pass the unseal keys according to this guide:
https://developer.hashicorp.com/vault/docs/enterprise/replication#seals
EOF
module = module.vault_unseal_nodes
module = module.vault_unseal_replication_followers
depends_on = [
step.create_primary_cluster,
step.create_secondary_cluster,
Expand Down
4 changes: 2 additions & 2 deletions enos/enos-modules.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ module "vault_test_ui" {
ui_run_tests = var.ui_run_tests
}

module "vault_unseal_nodes" {
source = "./modules/vault_unseal_nodes"
module "vault_unseal_replication_followers" {
source = "./modules/vault_unseal_replication_followers"

vault_install_dir = var.vault_install_dir
}
Expand Down
14 changes: 11 additions & 3 deletions enos/enos-scenario-dr-replication.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,11 @@ scenario "dr_replication" {
enos = local.enos_provider[matrix.distro]
}

verifies = quality.vault_api_sys_replication_dr_secondary_enable_write
verifies = [
quality.vault_api_sys_leader_read,
quality.vault_api_sys_replication_dr_secondary_enable_write,
quality.vault_api_sys_replication_dr_status_read,
]

variables {
ip_version = matrix.ip_version
Expand All @@ -834,7 +838,7 @@ scenario "dr_replication" {
type combinations. See the guide for more information:
https://developer.hashicorp.com/vault/docs/enterprise/replication#seals
EOF
module = module.vault_unseal_nodes
module = module.vault_unseal_replication_followers
depends_on = [
step.configure_dr_replication_secondary
]
Expand Down Expand Up @@ -883,7 +887,11 @@ scenario "dr_replication" {
and ensuring that all secondary nodes are unsealed.
EOF
module = module.vault_verify_dr_replication
depends_on = [step.configure_dr_replication_secondary]
depends_on = [
step.configure_dr_replication_secondary,
step.unseal_secondary_followers,
step.verify_secondary_cluster_is_unsealed_after_enabling_replication,
]

providers = {
enos = local.enos_provider[matrix.distro]
Expand Down
2 changes: 1 addition & 1 deletion enos/enos-scenario-pr-replication.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ scenario "pr_replication" {
type combinations. See the guide for more information:
https://developer.hashicorp.com/vault/docs/enterprise/replication#seals
EOF
module = module.vault_unseal_nodes
module = module.vault_unseal_replication_followers
depends_on = [
step.create_primary_cluster,
step.create_secondary_cluster,
Expand Down
6 changes: 4 additions & 2 deletions enos/modules/vault_setup_dr_primary/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,16 @@ variable "vault_root_token" {
type = string
description = "The vault root token"
}
resource "enos_remote_exec" "configure_dr_primary" {

// Enable DR replication on the primary. This will immediately clear all data in the secondary.
resource "enos_remote_exec" "enable_dr_replication" {
environment = {
VAULT_ADDR = var.vault_addr
VAULT_TOKEN = var.vault_root_token
VAULT_INSTALL_DIR = var.vault_install_dir
}

scripts = [abspath("${path.module}/scripts/configure-vault-dr-primary.sh")]
scripts = [abspath("${path.module}/scripts/enable.sh")]

transport = {
ssh = {
Expand Down
42 changes: 41 additions & 1 deletion enos/modules/vault_setup_replication_secondary/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ variable "wrapping_token" {
description = "The wrapping token created on primary cluster"
}

resource "enos_remote_exec" "configure_pr_secondary" {
resource "enos_remote_exec" "enable_replication" {
environment = {
VAULT_ADDR = var.vault_addr
VAULT_TOKEN = var.vault_root_token
Expand All @@ -72,3 +72,43 @@ resource "enos_remote_exec" "configure_pr_secondary" {
}
}
}

// Wait for our primary host to be the "leader", which means it's running and all "setup" tasks
// have been completed. We'll have to unseal our follower nodes after this has occurred.
module "wait_for_leader" {
source = "../vault_wait_for_leader"

depends_on = [
enos_remote_exec.enable_replication
]

hosts = { "0" : var.secondary_leader_host }
ip_version = var.ip_version
vault_addr = var.vault_addr
vault_install_dir = var.vault_install_dir
vault_root_token = var.vault_root_token
}

// Ensure that our leader is ready to for us to unseal follower nodes.
resource "enos_remote_exec" "wait_for_leader_ready" {
depends_on = [
module.wait_for_leader,
]

environment = {
REPLICATION_TYPE = var.replication_type
RETRY_INTERVAL = 3 // seconds
TIMEOUT_SECONDS = 60 // seconds
VAULT_ADDR = var.vault_addr
VAULT_TOKEN = var.vault_root_token
VAULT_INSTALL_DIR = var.vault_install_dir
}

scripts = [abspath("${path.module}/scripts/wait-for-leader-ready.sh")]

transport = {
ssh = {
host = var.secondary_leader_host.public_ip
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env bash
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: BUSL-1.1

set -e

fail() {
echo "$1" 1>&2
return 1
}

[[ -z "$REPLICATION_TYPE" ]] && fail "REPLICATION_TYPE env variable has not been set"
[[ -z "$RETRY_INTERVAL" ]] && fail "RETRY_INTERVAL env variable has not been set"
[[ -z "$TIMEOUT_SECONDS" ]] && fail "TIMEOUT_SECONDS env variable has not been set"
[[ -z "$VAULT_ADDR" ]] && fail "VAULT_ADDR env variable has not been set"
[[ -z "$VAULT_INSTALL_DIR" ]] && fail "VAULT_INSTALL_DIR env variable has not been set"
[[ -z "$VAULT_TOKEN" ]] && fail "VAULT_TOKEN env variable has not been set"

binpath=${VAULT_INSTALL_DIR}/vault
test -x "$binpath" || fail "unable to locate vault binary at $binpath"

export VAULT_FORMAT=json

replicationStatus() {
$binpath read "sys/replication/${REPLICATION_TYPE}/status" | jq .data
}

isReady() {
# Find the leader private IP address
local status
if ! status=$(replicationStatus); then
return 1
fi

if ! jq -eMc '.state == "stream-wals"' &> /dev/null <<< "$status"; then
echo "DR replication state is not yet running" 1>&2
echo "DR replication is not yet running, got: $(jq '.state' <<< "$status")" 1>&2
return 1
fi

if ! jq -eMc '.mode == "secondary"' &> /dev/null <<< "$status"; then
echo "DR replication mode is not yet primary, got: $(jq '.mode' <<< "$status")" 1>&2
return 1
fi

if ! jq -eMc '.corrupted_merkle_tree == false' &> /dev/null <<< "$status"; then
echo "DR replication merkle is corrupted" 1>&2
return 1
fi

echo "${REPLICATION_TYPE} primary is ready for followers to be unsealed!" 1>&2
return 0
}

begin_time=$(date +%s)
end_time=$((begin_time + TIMEOUT_SECONDS))
while [ "$(date +%s)" -lt "$end_time" ]; do
if isReady; then
exit 0
fi

sleep "$RETRY_INTERVAL"
done

fail "Timed out waiting for ${REPLICATION_TYPE} primary to ready: $(replicationStatus)"
Loading