From 9beb59b65bf5d0c118f832053c52a28cc304616c Mon Sep 17 00:00:00 2001 From: Matt McFarland Date: Mon, 24 Jun 2024 09:50:39 -0400 Subject: [PATCH 1/2] Remove storage account keys from deployment (#224) * Remove storage account keys from table access * pcfuncs don't use keys * Deploy and tests * Contrib * Remove temp * Remove verbosity * Move settings validation to startup check --- deployment/README.md | 16 ++-- deployment/bin/deploy | 39 ++++++++- deployment/bin/kv_add_ip | 3 +- deployment/bin/kv_rmv_ip | 3 +- deployment/helm/deploy-values.template.yaml | 6 +- .../templates/_helpers.tpl | 1 + .../templates/deployment.yaml | 8 +- .../templates/serviceaccount.yaml | 2 +- .../planetary-computer-stac/values.yaml | 4 + .../templates/deployment.yaml | 6 -- deployment/terraform/resources/aks.tf | 34 +++++++- deployment/terraform/resources/functions.tf | 2 +- deployment/terraform/resources/output.tf | 8 +- deployment/terraform/resources/providers.tf | 6 +- .../terraform/resources/storage_account.tf | 13 +++ pc-stac.dev.env | 3 - pc-tiler.dev.env | 3 - pccommon/pccommon/blob.py | 27 +++---- pccommon/pccommon/cli.py | 40 +++------ pccommon/pccommon/config/core.py | 22 +++-- pccommon/pccommon/constants.py | 8 ++ pccommon/pccommon/tables.py | 81 +++++++++---------- pccommon/tests/config/test_table_settings.py | 22 +++++ pcfuncs/funclib/settings.py | 5 -- pcfuncs/image/settings.py | 2 - 25 files changed, 221 insertions(+), 143 deletions(-) create mode 100644 pccommon/tests/config/test_table_settings.py diff --git a/deployment/README.md b/deployment/README.md index 1550e39e..6a536597 100644 --- a/deployment/README.md +++ b/deployment/README.md @@ -40,25 +40,27 @@ Container Registry repo where you published your local images: - `ACR_TILER_REPO` - `IMAGE_TAG` -__Note:__ Remember to bring down your resources after testing with `terraform destroy`! +**Note:** Remember to bring down your resources after testing with `terraform destroy`! ## Loading configuration data Configuration data is stored in Azure Storage Tables. Use the `pcapis` command line interface that is installed with the `pccommon` package to load data. For example: +```console +> az login # Use an account that has "Storage Table Data Contributor" on the account +> pcapis load -t collection --account pctapissatyasa --table collectionconfig --file pccommon/tests/data-files/collection_config.json ``` -> pcapis load -t collection --sas "${SAS_TOKEN}" --account pctapissatyasa --table collectionconfig --file pccommon/tests/data-files/collection_config.json -``` + To dump a single collection config, use: -``` -> pcapis dump -t collection --sas "${SAS_TOKEN}" --account pctapissatyasa --table collectionconfig --id naip +```console +> pcapis dump -t collection --account pctapissatyasa --table collectionconfig --id naip ``` For container configs, you must also specify the container account name used as the Partition Key: -``` -> pcapis dump -t collection --sas "${SAS_TOKEN}" --account pctapissatyasa --table containerconfig --id naip --container-account naipeuwest +```console +> pcapis dump -t collection --account pctapissatyasa --table containerconfig --id naip --container-account naipeuwest ``` Using the `load` command on a single dump file for either config will update the single row. diff --git a/deployment/bin/deploy b/deployment/bin/deploy index af776d1f..b84c04c6 100755 --- a/deployment/bin/deploy +++ b/deployment/bin/deploy @@ -49,9 +49,30 @@ while [[ "$#" -gt 0 ]]; do case $1 in ;; esac done +disable_shared_access_keys() { + echo "Disabling shared access key on storage account..." + az storage account update \ + --name ${SAK_STORAGE_ACCOUNT} \ + --resource-group ${SAK_RESOURCE_GROUP} \ + --allow-shared-key-access false \ + --output none + + if [ $? -ne 0 ]; then + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + echo "WARNING: Failed to turn off shared key access on the storage account." + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + exit 2 + fi +} + +# Always disable shared access keys on script exit +trap disable_shared_access_keys EXIT + ################################### # Check and configure environment # ################################### +SAK_STORAGE_ACCOUNT=pctapisstagingsa +SAK_RESOURCE_GROUP=pct-apis-westeurope-staging_rg if [[ -z ${TERRAFORM_DIR} ]]; then echo "Must pass in TERRAFORM_DIR with -t" @@ -91,6 +112,18 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then if [[ "${SKIP_TF}" != 1 ]]; then echo "Deploying infrastructure with Terraform..." + + echo "Enabling shared key access for storage account..." + # Terraform isn't able to read all resources from a storage account if shared key access is disabled + # so while we're deploying, we need to enable it. Since we haven't run TF yet, we don't have the name of the account + # so they are hardcoded here. This is a temporary workaround until this is resolved + # https://github.com/hashicorp/terraform-provider-azurerm/issues/25218 + az storage account update \ + --name ${SAK_STORAGE_ACCOUNT} \ + --resource-group ${SAK_RESOURCE_GROUP} \ + --allow-shared-key-access true \ + --output none + terraform init --upgrade if [ "${PLAN_ONLY}" ]; then @@ -142,7 +175,8 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then --kube-context "${KUBE_CONTEXT}" \ --wait \ --timeout 2m0s \ - -f ${DEPLOY_VALUES_FILE} + -f ${DEPLOY_VALUES_FILE} \ + --debug echo "================" echo "==== Tiler =====" @@ -154,7 +188,8 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then --kube-context "${KUBE_CONTEXT}" \ --wait \ --timeout 2m0s \ - -f ${DEPLOY_VALUES_FILE} + -f ${DEPLOY_VALUES_FILE} \ + --debug echo "==================" echo "==== Ingress =====" diff --git a/deployment/bin/kv_add_ip b/deployment/bin/kv_add_ip index b507b52b..137dee99 100755 --- a/deployment/bin/kv_add_ip +++ b/deployment/bin/kv_add_ip @@ -32,6 +32,7 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then -g ${KEY_VAULT_RESOURCE_GROUP_NAME} \ -n ${KEY_VAULT_NAME} \ --ip-address $cidr \ - --subscription ${ARM_SUBSCRIPTION_ID} + --subscription ${ARM_SUBSCRIPTION_ID} \ + --output none fi diff --git a/deployment/bin/kv_rmv_ip b/deployment/bin/kv_rmv_ip index dddc2401..228d9535 100755 --- a/deployment/bin/kv_rmv_ip +++ b/deployment/bin/kv_rmv_ip @@ -32,6 +32,7 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then -g ${KEY_VAULT_RESOURCE_GROUP_NAME} \ -n ${KEY_VAULT_NAME} \ --ip-address $cidr \ - --subscription ${ARM_SUBSCRIPTION_ID} + --subscription ${ARM_SUBSCRIPTION_ID} \ + --output none fi diff --git a/deployment/helm/deploy-values.template.yaml b/deployment/helm/deploy-values.template.yaml index f8b33b26..443cd857 100644 --- a/deployment/helm/deploy-values.template.yaml +++ b/deployment/helm/deploy-values.template.yaml @@ -42,6 +42,11 @@ stac: replicaCount: "{{ tf.stac_replica_count }}" podAnnotations: "pc/gitsha": "{{ env.GIT_COMMIT }}" + useWorkloadIdentity: true + serviceAccount: + annotations: + "azure.workload.identity/client-id": {{ tf.cluster_stac_identity_client_id }} + "azure.workload.identity/tenant-id": {{ tf.tenant_id }} appRootPath: "/stac" port: "80" @@ -86,7 +91,6 @@ tiler: storage: account_name: "{{ tf.storage_account_name }}" - account_key: "{{ tf.storage_account_key }}" collection_config_table_name: "{{ tf.collection_config_table_name }}" container_config_table_name: "{{ tf.container_config_table_name }}" ip_exception_config_table_name: "{{ tf.ip_exception_config_table_name }}" diff --git a/deployment/helm/published/planetary-computer-stac/templates/_helpers.tpl b/deployment/helm/published/planetary-computer-stac/templates/_helpers.tpl index 47b5fa14..433a2779 100644 --- a/deployment/helm/published/planetary-computer-stac/templates/_helpers.tpl +++ b/deployment/helm/published/planetary-computer-stac/templates/_helpers.tpl @@ -42,6 +42,7 @@ app.kubernetes.io/instance: {{ .Release.Name }} Common labels */}} {{- define "pcstac.labels" -}} +azure.workload.identity/use: {{ .Values.stac.deploy.useWorkloadIdentity | quote}} helm.sh/chart: {{ include "pcstac.chart" . }} {{ include "pcstac.selectorLabels" . }} {{- if .Chart.AppVersion }} diff --git a/deployment/helm/published/planetary-computer-stac/templates/deployment.yaml b/deployment/helm/published/planetary-computer-stac/templates/deployment.yaml index 625f9eb0..dacda7ca 100644 --- a/deployment/helm/published/planetary-computer-stac/templates/deployment.yaml +++ b/deployment/helm/published/planetary-computer-stac/templates/deployment.yaml @@ -19,7 +19,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} labels: - {{- include "pcstac.selectorLabels" . | nindent 8 }} + {{- include "pcstac.labels" . | nindent 8 }} spec: {{- with .Values.stac.deploy.imagePullSecrets }} imagePullSecrets: @@ -89,20 +89,14 @@ spec: value: "{{ .Values.stac.debug }}" - name: "PCAPIS_COLLECTION_CONFIG__ACCOUNT_NAME" value: "{{ .Values.storage.account_name }}" - - name: "PCAPIS_COLLECTION_CONFIG__ACCOUNT_KEY" - value: "{{ .Values.storage.account_key }}" - name: "PCAPIS_COLLECTION_CONFIG__TABLE_NAME" value: "{{ .Values.storage.collection_config_table_name }}" - name: "PCAPIS_CONTAINER_CONFIG__ACCOUNT_NAME" value: "{{ .Values.storage.account_name }}" - - name: "PCAPIS_CONTAINER_CONFIG__ACCOUNT_KEY" - value: "{{ .Values.storage.account_key }}" - name: "PCAPIS_CONTAINER_CONFIG__TABLE_NAME" value: "{{ .Values.storage.container_config_table_name }}" - name: "PCAPIS_IP_EXCEPTION_CONFIG__ACCOUNT_NAME" value: "{{ .Values.storage.account_name }}" - - name: "PCAPIS_IP_EXCEPTION_CONFIG__ACCOUNT_KEY" - value: "{{ .Values.storage.account_key }}" - name: "PCAPIS_IP_EXCEPTION_CONFIG__TABLE_NAME" value: "{{ .Values.storage.ip_exception_config_table_name }}" - name: "PCAPIS_REDIS_HOSTNAME" diff --git a/deployment/helm/published/planetary-computer-stac/templates/serviceaccount.yaml b/deployment/helm/published/planetary-computer-stac/templates/serviceaccount.yaml index 3177d7fa..511dc933 100644 --- a/deployment/helm/published/planetary-computer-stac/templates/serviceaccount.yaml +++ b/deployment/helm/published/planetary-computer-stac/templates/serviceaccount.yaml @@ -6,7 +6,7 @@ metadata: name: {{ include "pcstac.serviceAccountName" . }} labels: {{- include "pcstac.labels" . | nindent 4 }} - {{- with .Values.serviceAccount.annotations }} + {{- with .Values.stac.deploy.serviceAccount.annotations }} annotations: {{- toYaml . | nindent 4 }} {{- end }} diff --git a/deployment/helm/published/planetary-computer-stac/values.yaml b/deployment/helm/published/planetary-computer-stac/values.yaml index 46dd7565..e4f67b10 100644 --- a/deployment/helm/published/planetary-computer-stac/values.yaml +++ b/deployment/helm/published/planetary-computer-stac/values.yaml @@ -56,6 +56,10 @@ stac: affinity: {} autoscaling: enabled: false + useWorkloadIdentity: false + serviceAccount: + annotations: {} + cert: privateKeySecretRef: "letsencrypt-staging" diff --git a/deployment/helm/published/planetary-computer-tiler/templates/deployment.yaml b/deployment/helm/published/planetary-computer-tiler/templates/deployment.yaml index 9f3206dd..f7573f26 100644 --- a/deployment/helm/published/planetary-computer-tiler/templates/deployment.yaml +++ b/deployment/helm/published/planetary-computer-tiler/templates/deployment.yaml @@ -85,20 +85,14 @@ spec: value: "{{ .Values.tiler.default_max_items_per_tile}}" - name: "PCAPIS_COLLECTION_CONFIG__ACCOUNT_NAME" value: "{{ .Values.storage.account_name }}" - - name: "PCAPIS_COLLECTION_CONFIG__ACCOUNT_KEY" - value: "{{ .Values.storage.account_key }}" - name: "PCAPIS_COLLECTION_CONFIG__TABLE_NAME" value: "{{ .Values.storage.collection_config_table_name }}" - name: "PCAPIS_CONTAINER_CONFIG__ACCOUNT_NAME" value: "{{ .Values.storage.account_name }}" - - name: "PCAPIS_CONTAINER_CONFIG__ACCOUNT_KEY" - value: "{{ .Values.storage.account_key }}" - name: "PCAPIS_CONTAINER_CONFIG__TABLE_NAME" value: "{{ .Values.storage.container_config_table_name }}" - name: "PCAPIS_IP_EXCEPTION_CONFIG__ACCOUNT_NAME" value: "{{ .Values.storage.account_name }}" - - name: "PCAPIS_IP_EXCEPTION_CONFIG__ACCOUNT_KEY" - value: "{{ .Values.storage.account_key }}" - name: "PCAPIS_IP_EXCEPTION_CONFIG__TABLE_NAME" value: "{{ .Values.storage.ip_exception_config_table_name }}" - name: "PCAPIS_TABLE_VALUE_TTL" diff --git a/deployment/terraform/resources/aks.tf b/deployment/terraform/resources/aks.tf index 109e69f2..a9ac71a9 100644 --- a/deployment/terraform/resources/aks.tf +++ b/deployment/terraform/resources/aks.tf @@ -26,6 +26,10 @@ resource "azurerm_kubernetes_cluster" "pc" { vm_size = "Standard_DS2_v2" node_count = var.aks_node_count vnet_subnet_id = azurerm_subnet.node_subnet.id + + upgrade_settings { + max_surge = "10%" + } } identity { @@ -74,9 +78,31 @@ resource "azurerm_kubernetes_cluster" "pc" { } } +resource "azurerm_user_assigned_identity" "stac" { + name = "id-${local.prefix}-stac" + location = var.region + resource_group_name = azurerm_resource_group.pc.name +} + +resource "azurerm_federated_identity_credential" "stac" { + name = "federated-id-${local.prefix}" + resource_group_name = azurerm_resource_group.pc.name + audience = ["api://AzureADTokenExchange"] + issuer = azurerm_kubernetes_cluster.pc.oidc_issuer_url + subject = "system:serviceaccount:pc:planetary-computer-stac" + parent_id = azurerm_user_assigned_identity.stac.id + timeouts {} +} + +resource "azurerm_role_assignment" "cluster-stac-identity-storage-access" { + scope = azurerm_storage_account.pc.id + role_definition_name = "Storage Table Data Reader" + principal_id = azurerm_user_assigned_identity.stac.principal_id +} + # Workload Identity for tiler access to the Azure Maps account resource "azurerm_user_assigned_identity" "tiler" { - name = "id-${local.prefix}" + name = "id-${local.prefix}-tiler" location = var.region resource_group_name = azurerm_resource_group.pc.name } @@ -98,6 +124,12 @@ resource "azurerm_role_assignment" "cluster-identity-maps-render-token" { } +resource "azurerm_role_assignment" "cluster-tiler-identity-storage-access" { + scope = azurerm_storage_account.pc.id + role_definition_name = "Storage Table Data Reader" + principal_id = azurerm_user_assigned_identity.tiler.principal_id +} + # add the role to the identity the kubernetes cluster was assigned resource "azurerm_role_assignment" "network" { scope = azurerm_resource_group.pc.id diff --git a/deployment/terraform/resources/functions.tf b/deployment/terraform/resources/functions.tf index cdcf6ebf..e77b3082 100644 --- a/deployment/terraform/resources/functions.tf +++ b/deployment/terraform/resources/functions.tf @@ -46,7 +46,7 @@ resource "azurerm_function_app" "pcfuncs" { os_type = "linux" version = "~4" site_config { - linux_fx_version = "PYTHON|3.8" + linux_fx_version = "PYTHON|3.9" use_32_bit_worker_process = false ftps_state = "Disabled" diff --git a/deployment/terraform/resources/output.tf b/deployment/terraform/resources/output.tf index 733ae520..45bc4fa8 100644 --- a/deployment/terraform/resources/output.tf +++ b/deployment/terraform/resources/output.tf @@ -55,6 +55,10 @@ output "cluster_tiler_identity_client_id" { value = azurerm_user_assigned_identity.tiler.client_id } +output "cluster_stac_identity_client_id" { + value = azurerm_user_assigned_identity.stac.client_id +} + ## Ingress output "ingress_ip" { @@ -104,10 +108,6 @@ output "storage_account_name" { value = azurerm_storage_account.pc.name } -output "storage_account_key" { - value = azurerm_storage_account.pc.primary_access_key -} - output "collection_config_table_name" { value = azurerm_storage_table.collectionconfig.name } diff --git a/deployment/terraform/resources/providers.tf b/deployment/terraform/resources/providers.tf index 5671a49f..3dc2d08c 100644 --- a/deployment/terraform/resources/providers.tf +++ b/deployment/terraform/resources/providers.tf @@ -1,4 +1,4 @@ -provider azurerm { +provider "azurerm" { features {} use_oidc = true } @@ -9,9 +9,9 @@ terraform { required_providers { azurerm = { source = "hashicorp/azurerm" - version = "3.97.1" + version = "3.108.0" } } } -data "azurerm_client_config" "current" {} \ No newline at end of file +data "azurerm_client_config" "current" {} diff --git a/deployment/terraform/resources/storage_account.tf b/deployment/terraform/resources/storage_account.tf index e55aa6d1..b14c4e6e 100644 --- a/deployment/terraform/resources/storage_account.tf +++ b/deployment/terraform/resources/storage_account.tf @@ -6,8 +6,21 @@ resource "azurerm_storage_account" "pc" { account_replication_type = "LRS" min_tls_version = "TLS1_2" allow_nested_items_to_be_public = false + + # Disabling shared access keys breaks terraform's ability to do subsequent + # resource fetching during terraform plan. As a result, this property is + # ignored and managed outside of this apply session, via the deploy script. + # https://github.com/hashicorp/terraform-provider-azurerm/issues/25218 + + # shared_access_key_enabled = false + lifecycle { + ignore_changes = [ + shared_access_key_enabled, + ] + } } + # Tables resource "azurerm_storage_table" "collectionconfig" { diff --git a/pc-stac.dev.env b/pc-stac.dev.env index 1933729d..9a7ea0d7 100644 --- a/pc-stac.dev.env +++ b/pc-stac.dev.env @@ -15,17 +15,14 @@ USE_API_HYDRATE=TRUE # Azure Storage PCAPIS_COLLECTION_CONFIG__ACCOUNT_URL=http://azurite:10002/devstoreaccount1 PCAPIS_COLLECTION_CONFIG__ACCOUNT_NAME=devstoreaccount1 -PCAPIS_COLLECTION_CONFIG__ACCOUNT_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== PCAPIS_COLLECTION_CONFIG__TABLE_NAME=collectionconfig PCAPIS_CONTAINER_CONFIG__ACCOUNT_URL=http://azurite:10002/devstoreaccount1 PCAPIS_CONTAINER_CONFIG__ACCOUNT_NAME=devstoreaccount1 -PCAPIS_CONTAINER_CONFIG__ACCOUNT_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== PCAPIS_CONTAINER_CONFIG__TABLE_NAME=containerconfig PCAPIS_IP_EXCEPTION_CONFIG__ACCOUNT_URL=http://azurite:10002/devstoreaccount1 PCAPIS_IP_EXCEPTION_CONFIG__ACCOUNT_NAME=devstoreaccount1 -PCAPIS_IP_EXCEPTION_CONFIG__ACCOUNT_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== PCAPIS_IP_EXCEPTION_CONFIG__TABLE_NAME=ipexceptionlist # Disable config and stac caching in development by setting TTL to 1 second diff --git a/pc-tiler.dev.env b/pc-tiler.dev.env index fa5e9153..830ba6ee 100644 --- a/pc-tiler.dev.env +++ b/pc-tiler.dev.env @@ -25,17 +25,14 @@ VECTORTILE_SA_BASE_URL=https://pcvectortiles.blob.core.windows.net # Azure Storage PCAPIS_COLLECTION_CONFIG__ACCOUNT_URL=http://azurite:10002/devstoreaccount1 PCAPIS_COLLECTION_CONFIG__ACCOUNT_NAME=devstoreaccount1 -PCAPIS_COLLECTION_CONFIG__ACCOUNT_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== PCAPIS_COLLECTION_CONFIG__TABLE_NAME=collectionconfig PCAPIS_CONTAINER_CONFIG__ACCOUNT_URL=http://azurite:10002/devstoreaccount1 PCAPIS_CONTAINER_CONFIG__ACCOUNT_NAME=devstoreaccount1 -PCAPIS_CONTAINER_CONFIG__ACCOUNT_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== PCAPIS_CONTAINER_CONFIG__TABLE_NAME=containerconfig PCAPIS_IP_EXCEPTION_CONFIG__ACCOUNT_URL=http://azurite:10002/devstoreaccount1 PCAPIS_IP_EXCEPTION_CONFIG__ACCOUNT_NAME=devstoreaccount1 -PCAPIS_IP_EXCEPTION_CONFIG__ACCOUNT_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== PCAPIS_IP_EXCEPTION_CONFIG__TABLE_NAME=ipexceptionlist # Disable config and stac caching in development by setting TTL to 1 second diff --git a/pccommon/pccommon/blob.py b/pccommon/pccommon/blob.py index c4390355..b5fe0207 100644 --- a/pccommon/pccommon/blob.py +++ b/pccommon/pccommon/blob.py @@ -1,27 +1,22 @@ from typing import Dict, Optional, Union -from azure.identity import DefaultAzureCredential +from azure.identity import ManagedIdentityCredential from azure.storage.blob import ContainerClient +from pccommon.constants import AZURITE_ACCOUNT_KEY + def get_container_client( container_url: str, - sas_token: Optional[str] = None, - account_key: Optional[str] = None, ) -> ContainerClient: - credential: Optional[Union[str, Dict[str, str], DefaultAzureCredential]] = None - if account_key: - # Handle Azurite - if "devstoreaccount1" in container_url: - credential = { - "account_name": "devstoreaccount1", - "account_key": account_key, - } - else: - credential = account_key - elif sas_token: - credential = sas_token + credential: Optional[Union[Dict[str, str], ManagedIdentityCredential]] = None + # Handle Azurite + if container_url.startswith("http://azurite:"): + credential = { + "account_name": "devstoreaccount1", + "account_key": AZURITE_ACCOUNT_KEY, + } else: - credential = DefaultAzureCredential() + credential = ManagedIdentityCredential() return ContainerClient.from_container_url(container_url, credential=credential) diff --git a/pccommon/pccommon/cli.py b/pccommon/pccommon/cli.py index 8ee834c4..b853601b 100644 --- a/pccommon/pccommon/cli.py +++ b/pccommon/pccommon/cli.py @@ -15,20 +15,13 @@ from pccommon.version import __version__ -def get_account_url(account: str, account_url: Optional[str]) -> str: - return account_url or f"https://{account}.table.core.windows.net" - - -def load( - sas: str, account: str, table: str, type: str, file: str, **kwargs: Any -) -> int: - account_url = get_account_url(account, kwargs.get("account_url")) +def load(account: str, table: str, type: str, file: str, **kwargs: Any) -> int: with open(file) as f: rows = json.load(f) if type == "collection": - col_config_table = CollectionConfigTable.from_sas_token( - account_url=account_url, sas_token=sas, table_name=table + col_config_table = CollectionConfigTable.from_environment( + account_name=account, table_name=table ) for coll_id, config in rows.items(): print("Loading config for collection", coll_id) @@ -41,8 +34,8 @@ def load( print("========================================") elif type == "container": - cont_config_table = ContainerConfigTable.from_sas_token( - account_url=account_url, sas_token=sas, table_name=table + cont_config_table = ContainerConfigTable.from_environment( + account_name=account, table_name=table ) for path, config in rows.items(): storage_account, container = path.split("/") @@ -56,14 +49,13 @@ def load( return 0 -def dump(sas: str, account: str, table: str, type: str, **kwargs: Any) -> int: +def dump(account: str, table: str, type: str, **kwargs: Any) -> int: output = kwargs.get("output") - account_url = get_account_url(account, kwargs.get("account_url")) id = kwargs.get("id") result: Dict[str, Dict[str, Any]] = {} if type == "collection": - col_config_table = CollectionConfigTable.from_sas_token( - account_url=account_url, sas_token=sas, table_name=table + col_config_table = CollectionConfigTable.from_environment( + account_name=account, table_name=table ) if id: @@ -77,8 +69,8 @@ def dump(sas: str, account: str, table: str, type: str, **kwargs: Any) -> int: result[collection_id] = col_config.dict() elif type == "container": - con_config_table = ContainerConfigTable.from_sas_token( - account_url=account_url, sas_token=sas, table_name=table + con_config_table = ContainerConfigTable.from_environment( + account_name=account, table_name=table ) if id: con_account = kwargs.get("container_account") @@ -103,12 +95,11 @@ def dump(sas: str, account: str, table: str, type: str, **kwargs: Any) -> int: return 0 -def add_ip_exception(sas: str, account: str, table: str, **kwargs: Any) -> int: +def add_ip_exception(account: str, table: str, **kwargs: Any) -> int: ip_file = kwargs.get("file") ip = kwargs.get("ip") - account_url = get_account_url(account, kwargs.get("account_url")) - ip_table = IPExceptionListTable.from_sas_token( - account_url=account_url, sas_token=sas, table_name=table + ip_table = IPExceptionListTable.from_environment( + account_name=account, table_name=table ) if ip: print(f"Adding exception for IP {ip}...") @@ -139,11 +130,6 @@ def parse_args(args: List[str]) -> Optional[Dict[str, Any]]: subparsers = parser0.add_subparsers(dest="command") def add_common_opts(p: argparse.ArgumentParser, default_table: str) -> None: - p.add_argument( - "--sas", - help="SAS Token for the storage account.", - required=True, - ) p.add_argument("--account", help="Storage account name.", required=True) p.add_argument("--table", help="Table name.", default=default_table) p.add_argument( diff --git a/pccommon/pccommon/config/core.py b/pccommon/pccommon/config/core.py index 75162b2c..7e9f8d6f 100644 --- a/pccommon/pccommon/config/core.py +++ b/pccommon/pccommon/config/core.py @@ -4,7 +4,7 @@ from cachetools import Cache, LRUCache, cachedmethod from cachetools.func import lru_cache from cachetools.keys import hashkey -from pydantic import BaseModel, BaseSettings, Field, PrivateAttr +from pydantic import BaseModel, BaseSettings, Field, PrivateAttr, validator from pccommon.config.collections import CollectionConfigTable from pccommon.config.containers import ContainerConfigTable @@ -20,10 +20,19 @@ class TableConfig(BaseModel): account_name: str - account_key: str table_name: str account_url: Optional[str] = None + @validator("account_url") + def validate_url(cls, value: str) -> str: + if value and not value.startswith("http://azurite:"): + raise ValueError( + "Non-azurite account url provided. " + "Account keys can only be used with Azurite emulator." + ) + + return value + class PCAPIsConfig(BaseSettings): _cache: Cache = PrivateAttr(default_factory=lambda: LRUCache(maxsize=10)) @@ -48,30 +57,27 @@ class PCAPIsConfig(BaseSettings): @cachedmethod(cache=lambda self: self._cache, key=lambda _: hashkey("collection")) def get_collection_config_table(self) -> CollectionConfigTable: - return CollectionConfigTable.from_account_key( + return CollectionConfigTable.from_environment( account_url=self.collection_config.account_url, account_name=self.collection_config.account_name, - account_key=self.collection_config.account_key, table_name=self.collection_config.table_name, ttl=self.table_value_ttl, ) @cachedmethod(cache=lambda self: self._cache, key=lambda _: hashkey("container")) def get_container_config_table(self) -> ContainerConfigTable: - return ContainerConfigTable.from_account_key( + return ContainerConfigTable.from_environment( account_url=self.container_config.account_url, account_name=self.container_config.account_name, - account_key=self.container_config.account_key, table_name=self.container_config.table_name, ttl=self.table_value_ttl, ) @cachedmethod(cache=lambda self: self._cache, key=lambda _: hashkey("ip_whitelist")) def get_ip_exception_list_table(self) -> IPExceptionListTable: - return IPExceptionListTable.from_account_key( + return IPExceptionListTable.from_environment( account_url=self.ip_exception_config.account_url, account_name=self.ip_exception_config.account_name, - account_key=self.ip_exception_config.account_key, table_name=self.ip_exception_config.table_name, ttl=self.table_value_ttl, ) diff --git a/pccommon/pccommon/constants.py b/pccommon/pccommon/constants.py index 8929cbf9..2a8a8d36 100644 --- a/pccommon/pccommon/constants.py +++ b/pccommon/pccommon/constants.py @@ -30,3 +30,11 @@ HTTP_URL = COMMON_ATTRIBUTES["HTTP_URL"] HTTP_STATUS_CODE = COMMON_ATTRIBUTES["HTTP_STATUS_CODE"] HTTP_METHOD = COMMON_ATTRIBUTES["HTTP_METHOD"] + +# This is the Azurite storage account key. +# This is not a key for a real Storage Account and is publicly accessible +# on Azurite's GitHub repo. This is used only in development. +AZURITE_ACCOUNT_KEY = ( + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUz" + "FT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" +) diff --git a/pccommon/pccommon/tables.py b/pccommon/pccommon/tables.py index af60b7b1..2a523745 100644 --- a/pccommon/pccommon/tables.py +++ b/pccommon/pccommon/tables.py @@ -1,3 +1,4 @@ +import os from threading import Lock from typing import ( Any, @@ -10,17 +11,20 @@ Tuple, Type, TypeVar, + Union, ) import orjson -from azure.core.credentials import AzureNamedKeyCredential, AzureSasCredential +from azure.core.credentials import AzureNamedKeyCredential from azure.core.exceptions import ResourceNotFoundError from azure.data.tables import TableClient, TableEntity, TableServiceClient +from azure.identity import AzureCliCredential, ManagedIdentityCredential from cachetools import Cache, TTLCache, cachedmethod from cachetools.keys import hashkey from pydantic import BaseModel from pccommon.constants import ( + AZURITE_ACCOUNT_KEY, DEFAULT_IP_EXCEPTIONS_TTL, DEFAULT_TTL, IP_EXCEPTION_PARTITION_KEY, @@ -81,60 +85,49 @@ def __exit__(self, *args: Any) -> None: self._service_client = None @classmethod - def from_sas_token( - cls: Type[T], account_url: str, sas_token: str, table_name: str - ) -> T: - def _get_clients( - _url: str = account_url, _token: str = sas_token, _table: str = table_name - ) -> Tuple[Optional[TableServiceClient], TableClient]: - table_service_client = TableServiceClient( - endpoint=_url, - credential=AzureSasCredential(_token), - ) - return ( - table_service_client, - table_service_client.get_table_client(table_name=_table), - ) - - return cls(_get_clients) - - @classmethod - def from_connection_string( - cls: Type[T], connection_string: str, table_name: str - ) -> T: - def _get_clients( - _conn_str: str = connection_string, _table: str = table_name - ) -> Tuple[Optional[TableServiceClient], TableClient]: - table_service_client = TableServiceClient.from_connection_string( - conn_str=_conn_str - ) - return ( - table_service_client, - table_service_client.get_table_client(table_name=_table), - ) - - return cls(_get_clients) - - @classmethod - def from_account_key( + def from_environment( cls: Type[T], account_name: str, - account_key: str, table_name: str, account_url: Optional[str] = None, ttl: Optional[int] = None, ) -> T: def _get_clients( - _name: str = account_name, - _key: str = account_key, - _url: Optional[str] = account_url, + _account: str = account_name, _table: str = table_name, + _url: Optional[str] = account_url, ) -> Tuple[Optional[TableServiceClient], TableClient]: - _url = _url or f"https://{_name}.table.core.windows.net" - credential = AzureNamedKeyCredential(name=_name, key=_key) + credential: Union[ + AzureNamedKeyCredential, ManagedIdentityCredential, AzureCliCredential + ] + + # Check if the environment is configured to use Azurite and use that key. + # Otherwise, we must use a workload identity. + if _url: + if not _url.startswith("http://azurite:"): + raise ValueError( + "Non-azurite account url provided. " + "Account keys can only be used with Azurite emulator." + ) + + url = _url + credential = AzureNamedKeyCredential( + name=_account, key=AZURITE_ACCOUNT_KEY + ) + else: + client_id = os.environ.get("AZURE_CLIENT_ID") + credential = ( + ManagedIdentityCredential(client_id=client_id) + if client_id + else AzureCliCredential() + ) + + url = f"https://{_account}.table.core.windows.net" + table_service_client = TableServiceClient( - endpoint=_url, credential=credential + endpoint=url, credential=credential ) + return ( table_service_client, table_service_client.get_table_client(table_name=_table), diff --git a/pccommon/tests/config/test_table_settings.py b/pccommon/tests/config/test_table_settings.py new file mode 100644 index 00000000..bdb765c9 --- /dev/null +++ b/pccommon/tests/config/test_table_settings.py @@ -0,0 +1,22 @@ +import pytest + +from pccommon.config.core import TableConfig + + +def test_raises_on_non_azurite_account_url() -> None: + + invalid_url = "https://example.com" + with pytest.raises(ValueError) as exc_info: + TableConfig(account_url=invalid_url, table_name="test", account_name="test") + + assert ( + "Non-azurite account url provided. " + "Account keys can only be used with Azurite emulator." + ) in str(exc_info.value) + + +def test_settings_accepts_azurite_url() -> None: + valid_url = "http://azurite:12345" + + config = TableConfig(account_url=valid_url, table_name="test", account_name="test") + assert config.account_url == valid_url diff --git a/pcfuncs/funclib/settings.py b/pcfuncs/funclib/settings.py index eb049f0a..b4a44e23 100644 --- a/pcfuncs/funclib/settings.py +++ b/pcfuncs/funclib/settings.py @@ -9,16 +9,11 @@ class BaseExporterSettings(BaseSettings): api_root_url: str = "https://planetarycomputer.microsoft.com/api/data/v1" - output_storage_url: str - output_sas: Optional[str] = None - output_account_key: Optional[str] = None def get_container_client(self) -> ContainerClient: return get_container_client( self.output_storage_url, - sas_token=self.output_sas, - account_key=self.output_account_key, ) def get_register_url(self, data_api_url_override: Optional[str] = None) -> str: diff --git a/pcfuncs/image/settings.py b/pcfuncs/image/settings.py index 0a2c6526..becebdd5 100644 --- a/pcfuncs/image/settings.py +++ b/pcfuncs/image/settings.py @@ -26,8 +26,6 @@ class ImageSettings(BaseExporterSettings): def get_container_client(self) -> ContainerClient: return get_container_client( self.output_storage_url, - sas_token=self.output_sas, - account_key=self.output_account_key, ) def get_register_url(self, data_api_url_override: Optional[str] = None) -> str: From 69098c7014e638918675e041a14ba1128c07885b Mon Sep 17 00:00:00 2001 From: Matt McFarland Date: Mon, 24 Jun 2024 14:42:18 -0400 Subject: [PATCH 2/2] Storage access fixes (#225) * Use AzureAD auth for terraform backend Move away from using shared key credentials for the backend auth in both CI and local dev. * Test deploy * CLI + OIDC * Debug * Remove debug --- .github/workflows/cicd.yml | 6 +-- deployment/README.md | 2 + deployment/bin/deploy | 45 +++++++------------ deployment/bin/lib | 40 +++++++++++++++++ deployment/docker-compose.yml | 4 +- deployment/terraform/resources/providers.tf | 5 +++ .../terraform/resources/storage_account.tf | 5 +++ deployment/terraform/staging/main.tf | 1 + 8 files changed, 73 insertions(+), 35 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index c33e87be..ea8068ec 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -32,7 +32,7 @@ jobs: - name: Get image tag id: get_image_tag - run: + run: case "${GITHUB_REF}" in *tags*) echo "tag=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_OUTPUT ; @@ -57,7 +57,7 @@ jobs: - build_and_publish steps: - uses: actions/checkout@v3 - + - name: Log in with Azure uses: azure/login@v1 with: @@ -86,4 +86,4 @@ jobs: ARM_CLIENT_ID: ${{ fromJSON(secrets.SECURE_AZURE_CREDENTIALS).clientId }} ARM_SUBSCRIPTION_ID: ${{ fromJSON(secrets.SECURE_AZURE_CREDENTIALS).subscriptionId }} ARM_TENANT_ID: ${{ fromJSON(secrets.SECURE_AZURE_CREDENTIALS).tenantId }} - ARM_USE_OIDC: true \ No newline at end of file + ARM_USE_OIDC: true diff --git a/deployment/README.md b/deployment/README.md index 6a536597..cd33bb2b 100644 --- a/deployment/README.md +++ b/deployment/README.md @@ -10,6 +10,8 @@ The logic for the deployment workflow is encapsulated in the [bin/deploy](bin/de scripts/console --deploy ``` +To have access to the remote backend terraform state, the identity (App Registration in CI, or local corp credential if local) will need to have the `Storage Blob Data Owner` role on the `pctesttfstate` storage account. + ## Manual resources ### Deployment secrets Key Vault diff --git a/deployment/bin/deploy b/deployment/bin/deploy index b84c04c6..1d689580 100755 --- a/deployment/bin/deploy +++ b/deployment/bin/deploy @@ -49,30 +49,20 @@ while [[ "$#" -gt 0 ]]; do case $1 in ;; esac done -disable_shared_access_keys() { - echo "Disabling shared access key on storage account..." - az storage account update \ - --name ${SAK_STORAGE_ACCOUNT} \ - --resource-group ${SAK_RESOURCE_GROUP} \ - --allow-shared-key-access false \ - --output none - - if [ $? -ne 0 ]; then - echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" - echo "WARNING: Failed to turn off shared key access on the storage account." - echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" - exit 2 - fi -} - # Always disable shared access keys on script exit trap disable_shared_access_keys EXIT ################################### # Check and configure environment # ################################### -SAK_STORAGE_ACCOUNT=pctapisstagingsa -SAK_RESOURCE_GROUP=pct-apis-westeurope-staging_rg + +# Enable shared access keys on storage accounts that must have properties read +# [storage_account]=resource_group +declare -A SAK_STORAGE_ACCOUNTS +SAK_STORAGE_ACCOUNTS=( + ["pctapisstagingsa"]="pct-apis-westeurope-staging_rg" + ["pcfilestest"]="pc-test-manual-resources" +) if [[ -z ${TERRAFORM_DIR} ]]; then echo "Must pass in TERRAFORM_DIR with -t" @@ -94,6 +84,12 @@ setup_env echo "===== Running Deploy =====" echo "IMAGE_TAG: ${IMAGE_TAG}" +if [ -z "$ARM_CLIENT_ID" ]; then + export ARM_CLIENT_ID=$(az account show --query user.name -o tsv) + echo "Using Azure CLI auth with username: ${ARM_CLIENT_ID}" +fi + + # --------------------------------------------------- if [ "${BASH_SOURCE[0]}" = "${0}" ]; then @@ -113,16 +109,7 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then if [[ "${SKIP_TF}" != 1 ]]; then echo "Deploying infrastructure with Terraform..." - echo "Enabling shared key access for storage account..." - # Terraform isn't able to read all resources from a storage account if shared key access is disabled - # so while we're deploying, we need to enable it. Since we haven't run TF yet, we don't have the name of the account - # so they are hardcoded here. This is a temporary workaround until this is resolved - # https://github.com/hashicorp/terraform-provider-azurerm/issues/25218 - az storage account update \ - --name ${SAK_STORAGE_ACCOUNT} \ - --resource-group ${SAK_RESOURCE_GROUP} \ - --allow-shared-key-access true \ - --output none + enable_shared_access_keys terraform init --upgrade @@ -176,7 +163,6 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then --wait \ --timeout 2m0s \ -f ${DEPLOY_VALUES_FILE} \ - --debug echo "================" echo "==== Tiler =====" @@ -189,7 +175,6 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then --wait \ --timeout 2m0s \ -f ${DEPLOY_VALUES_FILE} \ - --debug echo "==================" echo "==== Ingress =====" diff --git a/deployment/bin/lib b/deployment/bin/lib index 329e682b..5710dbce 100755 --- a/deployment/bin/lib +++ b/deployment/bin/lib @@ -131,3 +131,43 @@ function get_cidr_range() { IFS='.' read -r -a ip_parts <<< "$runnerIpAddress" echo "${ip_parts[0]}.${ip_parts[1]}.0.0/16" } + +function disable_shared_access_keys() { + echo "Disabling shared access key on storage account..." + + for SAK_STORAGE_ACCOUNT in "${!SAK_STORAGE_ACCOUNTS[@]}"; do + SAK_RESOURCE_GROUP=${SAK_STORAGE_ACCOUNTS[$SAK_STORAGE_ACCOUNT]} + + az storage account update \ + --name ${SAK_STORAGE_ACCOUNT} \ + --resource-group ${SAK_RESOURCE_GROUP} \ + --allow-shared-key-access false \ + --output none + + if [ $? -ne 0 ]; then + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + echo "WARNING: Failed to turn off shared key access on the storage account." + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + exit 2 + fi + done +} + +function enable_shared_access_keys() { + echo "Enabling shared key access for storage account..." + # Terraform isn't able to read all resources from a storage account if shared key access is disabled + # so while we're deploying, we need to enable it. Since we haven't run TF yet, we don't have the name of the account + # so they are hardcoded here. This is a temporary workaround until this is resolved + # https://github.com/hashicorp/terraform-provider-azurerm/issues/25218 + + for SAK_STORAGE_ACCOUNT in "${!SAK_STORAGE_ACCOUNTS[@]}"; do + SAK_RESOURCE_GROUP=${SAK_STORAGE_ACCOUNTS[$SAK_STORAGE_ACCOUNT]} + + echo " - enabling ${SAK_STORAGE_ACCOUNT} / ${SAK_RESOURCE_GROUP}" + az storage account update \ + --name ${SAK_STORAGE_ACCOUNT} \ + --resource-group ${SAK_RESOURCE_GROUP} \ + --allow-shared-key-access true \ + --output none + done +} diff --git a/deployment/docker-compose.yml b/deployment/docker-compose.yml index 1e20e1a0..da67cbbe 100644 --- a/deployment/docker-compose.yml +++ b/deployment/docker-compose.yml @@ -8,11 +8,11 @@ services: environment: - ACR_STAC_REPO=${ACR_STAC_REPO:-pccomponentstest.azurecr.io/planetary-computer-apis/stac} - ACR_TILER_REPO=${ACR_TILER_REPO:-pccomponentstest.azurecr.io/planetary-computer-apis/tiler} - - IMAGE_TAG + - IMAGE_TAG=${IMAGE_TAG:-latest} - GIT_COMMIT - ARM_SUBSCRIPTION_ID=${ARM_SUBSCRIPTION_ID:-a84a690d-585b-4c7c-80d9-851a48af5a50} - - ARM_TENANT_ID + - ARM_TENANT_ID=${ARM_TENANT_ID:-72f988bf-86f1-41af-91ab-2d7cd011db47} - ARM_CLIENT_ID - ARM_USE_OIDC - ARM_OIDC_TOKEN diff --git a/deployment/terraform/resources/providers.tf b/deployment/terraform/resources/providers.tf index 3dc2d08c..2404234d 100644 --- a/deployment/terraform/resources/providers.tf +++ b/deployment/terraform/resources/providers.tf @@ -1,6 +1,11 @@ provider "azurerm" { features {} use_oidc = true + + # This could be used instead of temporarily enabling shared key access once + # this issue is resolved. + # https://github.com/hashicorp/terraform-provider-azurerm/issues/23142 + # storage_use_azuread = true } terraform { diff --git a/deployment/terraform/resources/storage_account.tf b/deployment/terraform/resources/storage_account.tf index b14c4e6e..411270d8 100644 --- a/deployment/terraform/resources/storage_account.tf +++ b/deployment/terraform/resources/storage_account.tf @@ -37,3 +37,8 @@ resource "azurerm_storage_table" "ipexceptionlist" { name = "ipexceptionlist" storage_account_name = azurerm_storage_account.pc.name } + +resource "azurerm_storage_table" "blobstoragebannedip" { + name = "blobstoragebannedip" + storage_account_name = azurerm_storage_account.pc.name +} diff --git a/deployment/terraform/staging/main.tf b/deployment/terraform/staging/main.tf index 269a2401..75567650 100644 --- a/deployment/terraform/staging/main.tf +++ b/deployment/terraform/staging/main.tf @@ -31,6 +31,7 @@ terraform { container_name = "pc-test-api" key = "pqe-apis.tfstate" use_oidc = true + use_azuread_auth = true } }