From 2346f82f4cf0f8a623fe9751e810e965d3d755c8 Mon Sep 17 00:00:00 2001 From: Caleb Woodbine Date: Wed, 25 Sep 2024 13:10:01 +1200 Subject: [PATCH] feat: add an example for Talos on OCI adds example Terraform code and documentation to deploy Talos on Oracle Cloud Signed-off-by: Andrey Smirnov Signed-off-by: Caleb Woodbine --- examples/terraform/oci/README.md | 147 ++++++++++++++++++++ examples/terraform/oci/data.tf | 145 ++++++++++++++++++++ examples/terraform/oci/iam.tf | 31 +++++ examples/terraform/oci/image.tf | 26 ++++ examples/terraform/oci/instances.tf | 128 +++++++++++++++++ examples/terraform/oci/locals.tf | 98 +++++++++++++ examples/terraform/oci/network.tf | 206 ++++++++++++++++++++++++++++ examples/terraform/oci/outputs.tf | 22 +++ examples/terraform/oci/storage.tf | 26 ++++ examples/terraform/oci/talos.tf | 65 +++++++++ examples/terraform/oci/variables.tf | 127 +++++++++++++++++ examples/terraform/oci/versions.tf | 25 ++++ 12 files changed, 1046 insertions(+) create mode 100644 examples/terraform/oci/README.md create mode 100644 examples/terraform/oci/data.tf create mode 100644 examples/terraform/oci/iam.tf create mode 100644 examples/terraform/oci/image.tf create mode 100644 examples/terraform/oci/instances.tf create mode 100644 examples/terraform/oci/locals.tf create mode 100644 examples/terraform/oci/network.tf create mode 100644 examples/terraform/oci/outputs.tf create mode 100644 examples/terraform/oci/storage.tf create mode 100644 examples/terraform/oci/talos.tf create mode 100644 examples/terraform/oci/variables.tf create mode 100644 examples/terraform/oci/versions.tf diff --git a/examples/terraform/oci/README.md b/examples/terraform/oci/README.md new file mode 100644 index 0000000..8eead08 --- /dev/null +++ b/examples/terraform/oci/README.md @@ -0,0 +1,147 @@ +# Oracle Cloud Terraform Example + +Example of a highly available Kubernetes cluster with Talos on Oracle Cloud. + +## Prequisites + +**general** + +- a top-level tenancy + +**install things** + +``` bash +brew install oci-cli hashicorp/tap/terraform siderolabs/tap/talosctl qemu +``` + +## Notes + +- although not officially supported by Oracle Cloud, network LoadBalancers are provided through the Oracle Cloud Controller (only officially supported on OKE) +- this guide will target arm64, though you can replace with amd64 if it doesn't suit your needs +- instances will only launch with firmware set to UEFI_64 and lauch mode set to PARAVIRTUALIZED + +## Uploading an image + +Unfortunately due to upload constraints, this portion of the deployment is unable to be run using Terraform. This may change in the future. + +Prepare and upload a Talos disk image for Oracle Cloud, with + +1. create a storage bucket: https://cloud.oracle.com/object-storage/buckets +2. using Talos Linux Image Factory, create a plan and generate an image to use. See this example: https://factory.talos.dev/?arch=arm64&cmdline=console%3DttyAMA0&cmdline-set=true&extensions=-&platform=oracle&target=cloud&version=1.8.0 +3. download the disk image (ending in raw.xz) +4. define the image metadata, with the steps under the section "**defining metadata**" +5. repack the image, with steps under the section "**repacking the image**" +6. upload the image to the storage bucket under objects +7. under object and view object details, copy the dedicated endpoint url. Example: https://axe608t7iscj.objectstorage.us-phoenix-1.oci.customer-oci.com/n/axe608t7iscj/b/talos/o/talos-v1.8.0-oracle-arm64.oci + +### defining metadata + +create a file called `image_metadata.json` with contents such as + +``` json +{ + "version": 2, + "externalLaunchOptions": { + "firmware": "UEFI_64", + "networkType": "PARAVIRTUALIZED", + "bootVolumeType": "PARAVIRTUALIZED", + "remoteDataVolumeType": "PARAVIRTUALIZED", + "localDataVolumeType": "PARAVIRTUALIZED", + "launchOptionsSource": "PARAVIRTUALIZED", + "pvAttachmentVersion": 2, + "pvEncryptionInTransitEnabled": true, + "consistentVolumeNamingEnabled": true + }, + "imageCapabilityData": null, + "imageCapsFormatVersion": null, + "operatingSystem": "Talos", + "operatingSystemVersion": "1.8.0", + "additionalMetadata": { + "shapeCompatibilities": [ + { + "internalShapeName": "VM.Standard.A1.Flex", + "ocpuConstraints": null, + "memoryConstraints": null + } + ] + } +} +``` + +### repacking the image + +decompress the downloaded disk image artifact from factory + +``` bash +xz --decompress DISK_IMAGE.raw.xz +``` + +use `qemu-img` to convert the image to qcow2 + +``` bash +qemu-img convert -f raw -O qcow2 oracle-arm64.raw oracle-arm64.qcow2 +``` + +repack the image as a tar file with the metadata + +``` bash +tar zcf oracle-arm64.oci oracle-arm64.qcow2 image_metadata.json +``` + +## Create a .tfvars file + +to configure authentication and namespacing, create a `.tfvars` file with values from the links placeholding in the example below + +``` hcl +tenancy_ocid = "TENANCY OCID : https://cloud.oracle.com/tenancy" +user_ocid = "YOUR USER OCID : https://cloud.oracle.com/identity/domains/my-profile" +private_key_path = "YOUR PRIVATE KEY PATH : https://cloud.oracle.com/identity/domains/my-profile/api-keys" +fingerprint = "THE FINGERPRINT FOR YOUR PRIVATE KEY : ^^" +region = "YOUR PREFERRED REGION : https://cloud.oracle.com/regions" +compartment_ocid = "YOUR COMPARTMENT OCID : https://cloud.oracle.com/identity/compartments" +talos_image_oci_bucket_url = "YOUR DEDICATED BUCKET OBJECT URL : https://cloud.oracle.com/object-storage/buckets" +``` + +## Bringing it up + +prepare the local direction for using Terraform + +``` bash +terraform init +``` + +verify the changes to provision + +``` bash +terraform plan -var-file=.tfvars +``` + +apply the changes + +``` bash +terraform apply -var-file=.tfvars +``` + +get the talosconfig + +``` bash +terraform output -raw talosconfig > ./talosconfig +``` + +get the kubeconfig + +``` bash +terraform output -raw kubeconfig > ./kubeconfig +``` + +destroy the worker nodes + +``` bash +terraform destroy -var-file=.tfvars -target=random_pet.worker +``` + +destroy + +``` bash +terraform destroy -var-file=.tfvars +``` diff --git a/examples/terraform/oci/data.tf b/examples/terraform/oci/data.tf new file mode 100644 index 0000000..b22729d --- /dev/null +++ b/examples/terraform/oci/data.tf @@ -0,0 +1,145 @@ +data "oci_identity_compartment" "this" { + id = var.compartment_ocid +} + +data "oci_identity_availability_domains" "availability_domains" { + #Required + compartment_id = var.tenancy_ocid +} + +data "oci_core_image_shapes" "image_shapes" { + depends_on = [oci_core_shape_management.image_shape] + #Required + image_id = oci_core_image.talos_image.id +} + +data "talos_image_factory_extensions_versions" "this" { + # get the latest talos version + talos_version = var.talos_version + filters = { + names = var.talos_extensions + } +} + +data "talos_image_factory_urls" "this" { + talos_version = var.talos_version + schematic_id = talos_image_factory_schematic.this.id + platform = "oracle" + architecture = var.architecture +} + +data "talos_client_configuration" "talosconfig" { + cluster_name = var.cluster_name + client_configuration = talos_machine_secrets.machine_secrets.client_configuration + endpoints = [for k, v in oci_core_instance.controlplane : v.public_ip] + nodes = concat( + [for k, v in oci_core_instance.controlplane : v.public_ip], + [for k, v in oci_core_instance.worker : v.private_ip] + ) +} + +data "talos_machine_configuration" "controlplane" { + cluster_name = var.cluster_name + # cluster_endpoint = "https://${var.kube_apiserver_domain}:6443" + cluster_endpoint = "https://${oci_network_load_balancer_network_load_balancer.controlplane_load_balancer.ip_addresses[0].ip_address}:6443" + + machine_type = "controlplane" + machine_secrets = talos_machine_secrets.machine_secrets.machine_secrets + + talos_version = var.talos_version + kubernetes_version = var.kubernetes_version + + docs = false + examples = false + + config_patches = [ + local.talos_base_configuration, + <<-EOT + machine: + features: + kubernetesTalosAPIAccess: + enabled: true + allowedRoles: + - os:reader + allowedKubernetesNamespaces: + - kube-system + EOT + , + yamlencode({ + machine = { + certSANs = concat([ + var.kube_apiserver_domain, + oci_network_load_balancer_network_load_balancer.controlplane_load_balancer.ip_addresses[0].ip_address, + ], + [for k, v in oci_core_instance.controlplane : v.public_ip] + ) + } + cluster = { + apiServer = { + certSANs = concat([ + var.kube_apiserver_domain, + oci_network_load_balancer_network_load_balancer.controlplane_load_balancer.ip_addresses[0].ip_address, + ], + [for k, v in oci_core_instance.controlplane : v.public_ip] + ) + } + } + }), + ] +} + +data "talos_machine_configuration" "worker" { + cluster_name = var.cluster_name + # cluster_endpoint = "https://${var.kube_apiserver_domain}:6443" + cluster_endpoint = "https://${oci_network_load_balancer_network_load_balancer.controlplane_load_balancer.ip_addresses[0].ip_address}:6443" + + machine_type = "worker" + machine_secrets = talos_machine_secrets.machine_secrets.machine_secrets + + talos_version = var.talos_version + kubernetes_version = var.kubernetes_version + + docs = false + examples = false + + config_patches = [ + local.talos_base_configuration, + < val } + # count = 1 + #Required + # choose the next availability domain which wasn't last + availability_domain = data.oci_identity_availability_domains.availability_domains.availability_domains[each.key % length(data.oci_identity_availability_domains.availability_domains.availability_domains)].name + compartment_id = var.compartment_ocid + shape = var.instance_shape == null ? data.oci_core_image_shapes.image_shapes.image_shape_compatibilities[0].shape : var.instance_shape + shape_config { + ocpus = var.controlplane_instance_ocpus + memory_in_gbs = var.controlplane_instance_memory_in_gbs + } + + create_vnic_details { + assign_public_ip = true + subnet_id = oci_core_subnet.subnet_regional.id + nsg_ids = [oci_core_network_security_group.network_security_group.id] + } + agent_config { + are_all_plugins_disabled = true + is_management_disabled = true + is_monitoring_disabled = true + } + availability_config { + is_live_migration_preferred = true + recovery_action = "RESTORE_INSTANCE" + } + #Optional + display_name = "${var.cluster_name}-control-plane-${each.value.id}" + freeform_tags = local.common_labels + launch_options { + #Optional + network_type = local.instance_mode + remote_data_volume_type = local.instance_mode + boot_volume_type = local.instance_mode + firmware = "UEFI_64" + } + instance_options { + are_legacy_imds_endpoints_disabled = true + } + source_details { + #Required + source_type = "image" + source_id = oci_core_image.talos_image.id + boot_volume_size_in_gbs = "50" + } + preserve_boot_volume = false + + lifecycle { + create_before_destroy = "true" + ignore_changes = [ + defined_tags + ] + } +} + +resource "oci_core_instance" "worker" { + for_each = { for idx, val in random_pet.worker : idx => val } + # count = 1 + #Required + # choose the next availability domain which wasn't last + availability_domain = data.oci_identity_availability_domains.availability_domains.availability_domains[each.key % length(data.oci_identity_availability_domains.availability_domains.availability_domains)].name + compartment_id = var.compartment_ocid + shape = var.instance_shape == null ? data.oci_core_image_shapes.image_shapes.image_shape_compatibilities[0].shape : var.instance_shape + metadata = { + user_data = base64encode(data.talos_machine_configuration.worker.machine_configuration) + } + shape_config { + ocpus = var.worker_instance_ocpus + memory_in_gbs = var.worker_instance_memory_in_gbs + } + + create_vnic_details { + assign_public_ip = true + subnet_id = oci_core_subnet.subnet_regional.id + nsg_ids = [oci_core_network_security_group.network_security_group.id] + } + agent_config { + are_all_plugins_disabled = true + is_management_disabled = true + is_monitoring_disabled = true + } + availability_config { + is_live_migration_preferred = true + recovery_action = "RESTORE_INSTANCE" + } + #Optional + display_name = "${var.cluster_name}-worker-${each.value.id}" + freeform_tags = local.common_labels + launch_options { + #Optional + network_type = local.instance_mode + remote_data_volume_type = local.instance_mode + boot_volume_type = local.instance_mode + firmware = "UEFI_64" + } + instance_options { + are_legacy_imds_endpoints_disabled = true + } + source_details { + #Required + source_type = "image" + source_id = oci_core_image.talos_image.id + boot_volume_size_in_gbs = "50" + } + preserve_boot_volume = false + + lifecycle { + create_before_destroy = "true" + ignore_changes = [ + metadata.user_data, + defined_tags + ] + } + + depends_on = [oci_core_instance.controlplane] +} diff --git a/examples/terraform/oci/locals.tf b/examples/terraform/oci/locals.tf new file mode 100644 index 0000000..7fead9d --- /dev/null +++ b/examples/terraform/oci/locals.tf @@ -0,0 +1,98 @@ +locals { + common_labels = { + "TalosCluster" = var.cluster_name + } + talos_install_image = data.talos_image_factory_urls.this.urls.installer + instance_mode = "PARAVIRTUALIZED" + talos_install_disk = "/dev/sda" + instance_kernel_arg_console = "ttyAMA0" + # Example: https://raw.githubusercontent.com/oracle/oci-cloud-controller-manager/v1.26.0/manifests/provider-config-instance-principals-example.yaml + oci_config_ini = < val } + #Required + backend_set_name = oci_network_load_balancer_backend_set.controlplane_backend_set.name + network_load_balancer_id = oci_network_load_balancer_network_load_balancer.controlplane_load_balancer.id + port = 6443 + + #Optional + target_id = each.value.id +} + +resource "oci_network_load_balancer_backend" "talos_backend" { + for_each = { for idx, val in oci_core_instance.controlplane : idx => val } + #Required + backend_set_name = oci_network_load_balancer_backend_set.talos_backend_set.name + network_load_balancer_id = oci_network_load_balancer_network_load_balancer.controlplane_load_balancer.id + port = 50000 + + #Optional + target_id = each.value.id +} diff --git a/examples/terraform/oci/outputs.tf b/examples/terraform/oci/outputs.tf new file mode 100644 index 0000000..965f676 --- /dev/null +++ b/examples/terraform/oci/outputs.tf @@ -0,0 +1,22 @@ +output "factory_disk_image" { + value = data.talos_image_factory_urls.this.urls.disk_image +} + +output "load_balancer_ip" { + value = oci_network_load_balancer_network_load_balancer.controlplane_load_balancer.ip_addresses[0].ip_address +} + +output "talosconfig" { + value = data.talos_client_configuration.talosconfig.talos_config + sensitive = true +} + +output "kubeconfig" { + value = talos_cluster_kubeconfig.kubeconfig.kubeconfig_raw + sensitive = true +} + +output "oci_cloud_provider_config" { + value = local.oci_cloud_provider_config + sensitive = true +} diff --git a/examples/terraform/oci/storage.tf b/examples/terraform/oci/storage.tf new file mode 100644 index 0000000..84521d5 --- /dev/null +++ b/examples/terraform/oci/storage.tf @@ -0,0 +1,26 @@ +resource "oci_core_volume" "worker" { + for_each = { for idx, val in oci_core_instance.worker : idx => val if var.worker_volume_enabled } + #Required + compartment_id = var.compartment_ocid + + #Optional + availability_domain = data.oci_identity_availability_domains.availability_domains.availability_domains[each.key % length(data.oci_identity_availability_domains.availability_domains.availability_domains)].name + display_name = each.value.display_name + freeform_tags = local.common_labels + size_in_gbs = var.worker_volume_size_in_gbs + + lifecycle { + create_before_destroy = "true" + ignore_changes = [ + defined_tags + ] + } +} + +resource "oci_core_volume_attachment" "worker_volume_attachment" { + for_each = { for idx, val in oci_core_volume.worker : idx => val if var.worker_volume_enabled } + #Required + attachment_type = local.instance_mode + instance_id = [for val in oci_core_instance.worker : val if val.display_name == each.value.display_name][0].id + volume_id = each.value.id +} diff --git a/examples/terraform/oci/talos.tf b/examples/terraform/oci/talos.tf new file mode 100644 index 0000000..a734e56 --- /dev/null +++ b/examples/terraform/oci/talos.tf @@ -0,0 +1,65 @@ +resource "talos_machine_secrets" "machine_secrets" { + talos_version = var.talos_version +} + +resource "talos_image_factory_schematic" "this" { + schematic = yamlencode( + { + customization = { + systemExtensions = { + officialExtensions = data.talos_image_factory_extensions_versions.this.extensions_info[*].name + } + } + } + ) +} + +resource "talos_cluster_kubeconfig" "kubeconfig" { + depends_on = [ + talos_machine_bootstrap.bootstrap + ] + client_configuration = talos_machine_secrets.machine_secrets.client_configuration + endpoint = oci_network_load_balancer_network_load_balancer.controlplane_load_balancer.ip_addresses[0].ip_address + node = oci_network_load_balancer_network_load_balancer.controlplane_load_balancer.ip_addresses[0].ip_address +} + +resource "talos_machine_configuration_apply" "controlplane" { + for_each = { for idx, val in oci_core_instance.controlplane : idx => val } + client_configuration = talos_machine_secrets.machine_secrets.client_configuration + machine_configuration_input = data.talos_machine_configuration.controlplane.machine_configuration + node = each.value.public_ip + + config_patches = [ + yamlencode({ + machine = { + kubelet = { + extraArgs = { + "provider-id" = each.value.id + } + } + } + }) + ] +} + +resource "talos_machine_configuration_apply" "worker" { + for_each = { for idx, val in oci_core_instance.worker : idx => val } + client_configuration = talos_machine_secrets.machine_secrets.client_configuration + machine_configuration_input = data.talos_machine_configuration.worker.machine_configuration + endpoint = [for k, v in oci_core_instance.controlplane : v.public_ip][0] + node = each.value.private_ip + + depends_on = [oci_core_volume_attachment.worker_volume_attachment, talos_machine_configuration_apply.controlplane] +} + +resource "talos_machine_bootstrap" "bootstrap" { + depends_on = [talos_machine_configuration_apply.controlplane] + + client_configuration = talos_machine_secrets.machine_secrets.client_configuration + endpoint = [for k, v in oci_core_instance.controlplane : v.public_ip][0] + node = [for k, v in oci_core_instance.controlplane : v.public_ip][0] + + lifecycle { + ignore_changes = all + } +} diff --git a/examples/terraform/oci/variables.tf b/examples/terraform/oci/variables.tf new file mode 100644 index 0000000..77ac76e --- /dev/null +++ b/examples/terraform/oci/variables.tf @@ -0,0 +1,127 @@ +variable "compartment_ocid" { + type = string + sensitive = true +} +variable "tenancy_ocid" { + type = string + sensitive = true +} +variable "user_ocid" { + type = string + sensitive = true +} +variable "fingerprint" { + type = string + sensitive = true +} +variable "private_key_path" { + type = string + default = "~/.oci/oci_main_terraform.pem" + sensitive = true +} +variable "instance_availability_domain" { + type = string + default = null +} +variable "region" { + description = "the OCI region where resources will be created" + type = string + default = null +} +variable "cluster_name" { + type = string + default = "talosoci" +} +variable "kube_apiserver_domain" { + type = string + default = null +} +variable "cidr_blocks" { + type = set(string) + default = ["10.0.0.0/16"] +} +variable "subnet_block" { + type = string + default = "10.0.0.0/24" +} +variable "subnet_block_regional" { + type = string + default = "10.0.10.0/24" +} +variable "talos_version" { + type = string + default = "v1.7.6" +} +variable "kubernetes_version" { + type = string + default = "v1.30.3" +} +variable "instance_shape" { + type = string + default = "VM.Standard.A1.Flex" +} +variable "oracle_cloud_ccm_version" { + type = string + default = "v1.29.0" +} +variable "talos_ccm_version" { + type = string + default = "v1.6.0" +} +variable "pod_subnet_block" { + type = string + default = "10.32.0.0/12" +} +variable "service_subnet_block" { + type = string + default = "10.200.0.0/22" +} +variable "architecture" { + type = string + default = "arm64" +} +variable "talos_extensions" { + type = set(string) + default = [ + "gvisor", + "kata-containers", + "iscsi-tools", + "mdadm", + ] +} +variable "controlplane_instance_count" { + type = number + default = 3 +} +variable "worker_instance_count" { + type = number + default = 6 +} +variable "talos_image_oci_bucket_url" { + type = string + nullable = false +} +variable "controlplane_instance_ocpus" { + type = number + default = 4 +} +variable "controlplane_instance_memory_in_gbs" { + type = string + default = "8" +} +variable "worker_instance_ocpus" { + type = number + default = 4 +} +variable "worker_instance_memory_in_gbs" { + type = string + default = "8" +} +variable "worker_volume_enabled" { + type = bool + default = true +} +variable "worker_volume_size_in_gbs" { + type = string + default = "500" +} diff --git a/examples/terraform/oci/versions.tf b/examples/terraform/oci/versions.tf new file mode 100644 index 0000000..f94f877 --- /dev/null +++ b/examples/terraform/oci/versions.tf @@ -0,0 +1,25 @@ +terraform { + required_providers { + oci = { + source = "oracle/oci" + version = "6.9.0" + } + talos = { + source = "siderolabs/talos" + version = "~>0.6.0-beta.1" + } + random = { + source = "hashicorp/random" + version = "> 0.0.0" + } + } + required_version = ">= 1.2" +} + +provider "oci" { + tenancy_ocid = var.tenancy_ocid + user_ocid = var.user_ocid + private_key_path = var.private_key_path + fingerprint = var.fingerprint + region = var.region +}