Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: configure federated learning service accounts #81

Open
wants to merge 1 commit into
base: int-federated-learning
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
SHARED_CONFIG_PATHS=${@}
SHARED_CONFIG_PATHS=("${@}")

for SHARED_CONFIG_PATH in ${SHARED_CONFIG_PATHS}; do
echo "Loading shared configuration(${SHARED_CONFIG_PATH})"
echo "-------------------------------------------------------------------------"
cd ${SHARED_CONFIG_PATH} || exit 1
terraform init >/dev/null
terraform apply -auto-approve -input=false >/dev/null
terraform output
echo -e "-------------------------------------------------------------------------\n"
eval $(terraform output | sed -r 's/(\".*\")|\s*/\1/g')
for SHARED_CONFIG_PATH in "${SHARED_CONFIG_PATHS[@]}"; do
echo "Loading shared configuration(${SHARED_CONFIG_PATH})"
echo "-------------------------------------------------------------------------"
cd "${SHARED_CONFIG_PATH}" || exit 1
terraform init >/dev/null
terraform apply -auto-approve -input=false >/dev/null
terraform output
echo -e "-------------------------------------------------------------------------\n"
eval "$(terraform output | sed -r 's/(\".*\")|\s*/\1/g')"
done
79 changes: 44 additions & 35 deletions platforms/gke/base/core/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,57 +14,66 @@
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail

# shellcheck disable=SC1091
source "${ACP_PLATFORM_CORE_DIR}/functions.sh"

start_timestamp=$(date +%s)

declare -a terraservices
if [[ -v CORE_TERRASERVICES_APPLY ]]; then
terraservices=("${CORE_TERRASERVICES_APPLY[@]}")
if [[ -v CORE_TERRASERVICES_APPLY ]] &&
[[ -n "${CORE_TERRASERVICES_APPLY:-""}" ]]; then
echo "Found customized core platform terraservices set to apply: ${CORE_TERRASERVICES_APPLY}"
ParseSpaceSeparatedBashArray "${CORE_TERRASERVICES_APPLY}" "terraservices"
else
terraservices=(
"networking"
"container_cluster"
"container_node_pool"
"gke_enterprise/fleet_membership"
# Disable gke_enterprise/servicemesh due to b/376312292
# "gke_enterprise/servicemesh"
"workloads/kueue"
)
terraservices=(
"networking"
"container_cluster"
"container_node_pool"
"gke_enterprise/fleet_membership"
# Disable gke_enterprise/servicemesh due to b/376312292
# "gke_enterprise/servicemesh"
"workloads/kueue"
)
fi
echo "Core platform terraservices to provision: ${terraservices[*]}"

source ${ACP_PLATFORM_BASE_DIR}/_shared_config/scripts/set_environment_variables.sh ${ACP_PLATFORM_BASE_DIR}/_shared_config
# shellcheck disable=SC1091
source "${ACP_PLATFORM_BASE_DIR}/_shared_config/scripts/set_environment_variables.sh" "${ACP_PLATFORM_BASE_DIR}/_shared_config"

cd ${ACP_PLATFORM_CORE_DIR}/initialize &&
echo "Current directory: $(pwd)" &&
sed -i "s/^\([[:blank:]]*bucket[[:blank:]]*=\).*$/\1 \"${terraform_bucket_name}\"/" ${ACP_PLATFORM_CORE_DIR}/initialize/backend.tf.bucket &&
export STATE_MIGRATED="false" &&
if gcloud storage ls gs://${terraform_bucket_name}/terraform/initialize/default.tfstate &>/dev/null; then
if [ ! -f ${ACP_PLATFORM_CORE_DIR}/initialize/backend.tf ]; then
cp backend.tf.bucket backend.tf
fi
export STATE_MIGRATED="true"
# shellcheck disable=SC2154 # Variable is defined as a terraform output and sourced in other scripts
cd "${ACP_PLATFORM_CORE_DIR}/initialize" &&
echo "Current directory: $(pwd)" &&
sed -i "s/^\([[:blank:]]*bucket[[:blank:]]*=\).*$/\1 \"${terraform_bucket_name}\"/" "${ACP_PLATFORM_CORE_DIR}/initialize/backend.tf.bucket" &&
export STATE_MIGRATED="false" &&
if gcloud storage ls "gs://${terraform_bucket_name}/terraform/initialize/default.tfstate" &>/dev/null; then
if [ ! -f "${ACP_PLATFORM_CORE_DIR}/initialize/backend.tf" ]; then
cp backend.tf.bucket backend.tf
fi
export STATE_MIGRATED="true"
fi

cd ${ACP_PLATFORM_CORE_DIR}/initialize &&
terraform init &&
terraform plan -input=false -out=tfplan &&
terraform apply -input=false tfplan || exit 1
cd "${ACP_PLATFORM_CORE_DIR}/initialize" &&
terraform init &&
terraform plan -input=false -out=tfplan &&
terraform apply -input=false tfplan || exit 1
rm tfplan

if [ ${STATE_MIGRATED} == "false" ]; then
echo "Migrating the state backend"
terraform init -force-copy -migrate-state || exit 1
rm -rf terraform.tfstate*
if [ "${STATE_MIGRATED}" == "false" ]; then
echo "Migrating the state backend"
terraform init -force-copy -migrate-state || exit 1
rm -rf terraform.tfstate*
fi

for terraservice in "${terraservices[@]}"; do
cd "${ACP_PLATFORM_CORE_DIR}/${terraservice}" &&
echo "Current directory: $(pwd)" &&
terraform init &&
terraform plan -input=false -out=tfplan &&
terraform apply -input=false tfplan || exit 1
rm tfplan
cd "${ACP_PLATFORM_CORE_DIR}/${terraservice}" &&
echo "Current directory: $(pwd)" &&
terraform init &&
terraform plan -input=false -out=tfplan &&
terraform apply -input=false tfplan || exit 1
rm tfplan
done

end_timestamp=$(date +%s)
Expand Down
36 changes: 36 additions & 0 deletions platforms/gke/base/core/functions.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
#
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

ParseSeparatedBashArray() {
local STRING_TO_PARSE="${1}"
local -n DESTINATION_ARRAY="${2}"
local STRING_ARRAY_SEPARATOR="${3}"

echo "Parsing ${STRING_TO_PARSE} as a Bash array"

local -a PARSED_ARRAY
IFS="${STRING_ARRAY_SEPARATOR}" read -r -a PARSED_ARRAY <<<"${STRING_TO_PARSE}"
echo "Elements to add to ${!DESTINATION_ARRAY}: ${PARSED_ARRAY[*]}"

DESTINATION_ARRAY+=("${PARSED_ARRAY[@]}")
echo "${!DESTINATION_ARRAY} after adding options: ${DESTINATION_ARRAY[*]}"

unset -n DESTINATION_ARRAY
}

ParseSpaceSeparatedBashArray() {
ParseSeparatedBashArray "${1}" "${2}" " "
}
105 changes: 58 additions & 47 deletions platforms/gke/base/core/teardown.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,71 +14,82 @@
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail

# shellcheck disable=SC1091
source "${ACP_PLATFORM_CORE_DIR}/functions.sh"

start_timestamp=$(date +%s)

declare -a terraservices
if [[ -v CORE_TERRASERVICES_DESTROY ]]; then
terraservices=("${CORE_TERRASERVICES_DESTROY[@]}")
if [[ -v CORE_TERRASERVICES_DESTROY ]] &&
[[ -n "${CORE_TERRASERVICES_DESTROY:-""}" ]]; then
echo "Found customized core platform terraservices set to destroy: ${CORE_TERRASERVICES_DESTROY}"
ParseSpaceSeparatedBashArray "${CORE_TERRASERVICES_DESTROY}" "terraservices"
else
terraservices=(
"workloads/kueue"
# Disable gke_enterprise/servicemesh due to b/376312292
# "gke_enterprise/servicemesh"
"gke_enterprise/fleet_membership"
"container_node_pool"
"container_cluster"
"networking"
)
terraservices=(
"workloads/kueue"
# Disable gke_enterprise/servicemesh due to b/376312292
# "gke_enterprise/servicemesh"
"gke_enterprise/fleet_membership"
"container_node_pool"
"container_cluster"
"networking"
)
fi
echo "Core platform terraservices to destroy: ${terraservices[*]}"

source ${ACP_PLATFORM_BASE_DIR}/_shared_config/scripts/set_environment_variables.sh ${ACP_PLATFORM_BASE_DIR}/_shared_config
# shellcheck disable=SC1091
source "${ACP_PLATFORM_BASE_DIR}/_shared_config/scripts/set_environment_variables.sh" "${ACP_PLATFORM_BASE_DIR}/_shared_config"

cd ${ACP_PLATFORM_CORE_DIR}/initialize &&
echo "Current directory: $(pwd)" &&
sed -i "s/^\([[:blank:]]*bucket[[:blank:]]*=\).*$/\1 \"${terraform_bucket_name}\"/" ${ACP_PLATFORM_CORE_DIR}/initialize/backend.tf.bucket &&
cp backend.tf.bucket backend.tf &&
terraform init &&
terraform plan -input=false -out=tfplan &&
terraform apply -input=false tfplan || exit 1
# shellcheck disable=SC2154 # Variable is defined as a terraform output and sourced in other scripts
cd "${ACP_PLATFORM_CORE_DIR}/initialize" &&
echo "Current directory: $(pwd)" &&
sed -i "s/^\([[:blank:]]*bucket[[:blank:]]*=\).*$/\1 \"${terraform_bucket_name}\"/" "${ACP_PLATFORM_CORE_DIR}/initialize/backend.tf.bucket" &&
cp backend.tf.bucket backend.tf &&
terraform init &&
terraform plan -input=false -out=tfplan &&
terraform apply -input=false tfplan || exit 1
rm tfplan

for terraservice in "${terraservices[@]}"; do
cd "${ACP_PLATFORM_CORE_DIR}/${terraservice}" &&
echo "Current directory: $(pwd)" &&
terraform init &&
terraform destroy -auto-approve || exit 1
rm -rf .terraform/
done

cd ${ACP_PLATFORM_CORE_DIR}/initialize &&
cd "${ACP_PLATFORM_CORE_DIR}/${terraservice}" &&
echo "Current directory: $(pwd)" &&
rm -rf backend.tf &&
terraform init -force-copy -lock=false -migrate-state || exit 1
gcloud storage rm -r gs://${terraform_bucket_name}/* &&
terraform init &&
terraform destroy -auto-approve || exit 1
rm -rf .terraform/
done

cd "${ACP_PLATFORM_CORE_DIR}/initialize" &&
echo "Current directory: $(pwd)" &&
rm -rf backend.tf &&
terraform init -force-copy -lock=false -migrate-state || exit 1
# Quote the globbing expression because we don't want to expand it with the
# shell
gcloud storage rm -r "gs://${terraform_bucket_name}/*" &&
terraform destroy -auto-approve || exit 1

rm -rf \
${ACP_PLATFORM_BASE_DIR}/_shared_config/.terraform/ \
${ACP_PLATFORM_BASE_DIR}/_shared_config/terraform.tfstate* \
${ACP_PLATFORM_CORE_DIR}/initialize/.terraform/ \
${ACP_PLATFORM_CORE_DIR}/initialize/terraform.tfstate* \
${ACP_PLATFORM_CORE_DIR}/networking/.terraform/ \
${ACP_PLATFORM_CORE_DIR}/container_cluster/.terraform/ \
${ACP_PLATFORM_CORE_DIR}/container_node_pool/.terraform/ \
${ACP_PLATFORM_CORE_DIR}/container_node_pool/container_node_pool_*.tf \
${ACP_PLATFORM_CORE_DIR}/gke_enterprise/configmanagement/git/.terraform/ \
${ACP_PLATFORM_CORE_DIR}/gke_enterprise/configmanagement/oci/.terraform/ \
${ACP_PLATFORM_CORE_DIR}/gke_enterprise/fleet_membership/.terraform/ \
${ACP_PLATFORM_CORE_DIR}/gke_enterprise/servicemesh/.terraform/ \
${ACP_PLATFORM_CORE_DIR}/workloads/kueue.terraform/ \
${ACP_PLATFORM_CORE_DIR}/workloads/kubeconfig \
${ACP_PLATFORM_CORE_DIR}/workloads/manifests
"${ACP_PLATFORM_BASE_DIR}/_shared_config/.terraform/" \
"${ACP_PLATFORM_BASE_DIR}/_shared_config"/terraform.tfstate* \
"${ACP_PLATFORM_CORE_DIR}/initialize/.terraform/" \
"${ACP_PLATFORM_CORE_DIR}/initialize"/terraform.tfstate* \
"${ACP_PLATFORM_CORE_DIR}/networking/.terraform/" \
"${ACP_PLATFORM_CORE_DIR}/container_cluster/.terraform/" \
"${ACP_PLATFORM_CORE_DIR}/container_node_pool/.terraform/" \
"${ACP_PLATFORM_CORE_DIR}/container_node_pool"/container_node_pool_*.tf \
"${ACP_PLATFORM_CORE_DIR}/gke_enterprise/configmanagement/git/.terraform/" \
"${ACP_PLATFORM_CORE_DIR}/gke_enterprise/configmanagement/oci/.terraform/" \
"${ACP_PLATFORM_CORE_DIR}/gke_enterprise/fleet_membership/.terraform/" \
"${ACP_PLATFORM_CORE_DIR}/gke_enterprise/servicemesh/.terraform/" \
"${ACP_PLATFORM_CORE_DIR}/workloads/kueue.terraform/" \
"${ACP_PLATFORM_CORE_DIR}/workloads/kubeconfig" \
"${ACP_PLATFORM_CORE_DIR}/workloads/manifests"

git restore \
${ACP_PLATFORM_CORE_DIR}/initialize/backend.tf.bucket \
${ACP_PLATFORM_CORE_DIR}/container_node_pool/container_node_pool_*.tf
"${ACP_PLATFORM_CORE_DIR}/initialize/backend.tf.bucket" \
"${ACP_PLATFORM_CORE_DIR}/container_node_pool"/container_node_pool_*.tf

end_timestamp=$(date +%s)
total_runtime_value=$((end_timestamp - start_timestamp))
Expand Down
1 change: 1 addition & 0 deletions platforms/gke/base/use-cases/federated-learning/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ FEDERATED_LEARNING_SHARED_CONFIG_DIR="${FEDERATED_LEARNING_USE_CASE_TERRAFORM_DI
# Terraservices that are necessary for the core platform
federated_learning_core_platform_terraservices=(
"key_management_service"
"service_account"
)

# shellcheck disable=SC2034 # Variable is used in other scripts
Expand Down
12 changes: 5 additions & 7 deletions platforms/gke/base/use-cases/federated-learning/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@ start_timestamp_federated_learning=$(date +%s)
echo "Initializing the core platform"
# Don't provision any core platform terraservice becuase we just need
# to initialize the terraform environment and remote backend
declare -a CORE_TERRASERVICES_APPLY
CORE_TERRASERVICES_APPLY=("initialize")
# shellcheck disable=SC1091
source "${ACP_PLATFORM_CORE_DIR}/deploy.sh"
CORE_TERRASERVICES_APPLY="initialize" \
"${ACP_PLATFORM_CORE_DIR}/deploy.sh"

echo "Preparing core platform configuration files"
for configuration_variable in "${TERRAFORM_CLUSTER_CONFIGURATION[@]}"; do
Expand All @@ -51,10 +50,9 @@ fi
edit_terraform_configuration_variable_value_in_file "cluster_database_encryption_key_name_placeholder" "${cluster_database_encryption_key_id}" "${ACP_PLATFORM_SHARED_CONFIG_CLUSTER_AUTO_VARS_FILE}"

echo "Provisioning the core platform"
# shellcheck disable=SC2034 # Variable is used in other scripts
CORE_TERRASERVICES_APPLY=("networking" "container_cluster" "gke_enterprise/fleet_membership")
# shellcheck disable=SC1091
source "${ACP_PLATFORM_CORE_DIR}/deploy.sh"
# shellcheck disable=SC1091,SC2034 # Variable is used in other scripts
CORE_TERRASERVICES_APPLY="networking container_cluster gke_enterprise/fleet_membership" \
"${ACP_PLATFORM_CORE_DIR}/deploy.sh"

echo "Provisioning the use case resources"
# shellcheck disable=SC2154 # variable defined in common.sh
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,29 @@
locals {
gke_robot_service_account = "service-${data.google_project.default.number}@container-engine-robot.iam.gserviceaccount.com"
gke_robot_service_account_iam_email = "serviceAccount:${local.gke_robot_service_account}"

tenants = {
for name in var.federated_learning_tenant_names : name => {
tenant_name = name
tenant_nodepool_name = format("%s-%s-p", local.cluster_name, name)
tenant_nodepool_sa_name = format("%s-%s-n", local.cluster_name, name)
tenant_apps_sa_name = format("%s-%s-a", local.cluster_name, name)
tenant_apps_kubernetes_service_account_name = local.tenant_apps_kubernetes_service_account_name
}
}

# Put all service account names in a list so we can create them with a single
# google_service_account resource
service_account_names = concat(
[for tenant in local.tenants : tenant.tenant_nodepool_sa_name],
[for tenant in local.tenants : tenant.tenant_apps_sa_name],
)

tenant_apps_kubernetes_service_account_name = "fl-ksa"
}

variable "federated_learning_tenant_names" {
default = ["fl-1"]
description = "List of named tenants to be created in the cluster. Each tenant gets a dedicated node pool and Kubernetes namespace, isolated from other tenants."
type = list(string)
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
# limitations under the License.

resource "google_artifact_registry_repository" "container_image_repository" {
location = var.cluster_region
repository_id = "${local.unique_identifier_prefix}-fl-repository"
description = "Federated Learning container image repository"
format = "DOCKER"
location = var.cluster_region
project = google_project_service.artifactregistry_googleapis_com.project
repository_id = "${local.unique_identifier_prefix}-fl-repository"

cleanup_policies {
action = "DELETE"
Expand Down
Loading
Loading