Skip to content

Commit

Permalink
Merge pull request #10 from truefoundry/gpu-condition
Browse files Browse the repository at this point in the history
Gpu condition
  • Loading branch information
dunefro authored Mar 16, 2024
2 parents 9b1df91 + f52ab23 commit e444a28
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 30 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,16 @@ Truefoundry Azure Cluster Module
| <a name="input_disk_driver_version"></a> [disk\_driver\_version](#input\_disk\_driver\_version) | Version of disk driver. Supported values `v1` and `v2` | `string` | `"v1"` | no |
| <a name="input_disk_size"></a> [disk\_size](#input\_disk\_size) | Disk size of the initial node pool in GB | `string` | `"100"` | no |
| <a name="input_dns_ip"></a> [dns\_ip](#input\_dns\_ip) | IP from service CIDR used for internal DNS | `string` | `"10.255.0.10"` | no |
| <a name="input_enable_A100_node_pools"></a> [enable\_A100\_node\_pools](#input\_enable\_A100\_node\_pools) | Enable A100 node pools spot/on-demand | `bool` | `true` | no |
| <a name="input_enable_A10_node_pools"></a> [enable\_A10\_node\_pools](#input\_enable\_A10\_node\_pools) | Enable A10 node pools spot/on-demand | `bool` | `true` | no |
| <a name="input_enable_T4_node_pools"></a> [enable\_T4\_node\_pools](#input\_enable\_T4\_node\_pools) | Enable T4 node pools spot/on-demand | `bool` | `true` | no |
| <a name="input_enable_blob_driver"></a> [enable\_blob\_driver](#input\_enable\_blob\_driver) | Enable blob storage provider | `bool` | `true` | no |
| <a name="input_enable_disk_driver"></a> [enable\_disk\_driver](#input\_enable\_disk\_driver) | Enable disk storage provider | `bool` | `true` | no |
| <a name="input_enable_file_driver"></a> [enable\_file\_driver](#input\_enable\_file\_driver) | Enable file storage provider | `bool` | `true` | no |
| <a name="input_enable_snapshot_controller"></a> [enable\_snapshot\_controller](#input\_enable\_snapshot\_controller) | Enable snapshot controller | `bool` | `true` | no |
| <a name="input_enable_storage_profile"></a> [enable\_storage\_profile](#input\_enable\_storage\_profile) | Enable storage profile for the cluster. If disabled `enable_blob_driver`, `enable_file_driver`, `enable_disk_driver` and `enable_snapshot_controller` will have no impact | `bool` | `true` | no |
| <a name="input_initial_node_pool_max_surge"></a> [initial\_node\_pool\_max\_surge](#input\_initial\_node\_pool\_max\_surge) | Max surge in percentage for the intial node pool | `string` | `"10"` | no |
| <a name="input_intial_node_pool_instance_type"></a> [intial\_node\_pool\_instance\_type](#input\_intial\_node\_pool\_instance\_type) | Instance size of the initial node pool | `string` | `"Standard_D2s_v5"` | no |
| <a name="input_intial_node_pool_spot_instance_type"></a> [intial\_node\_pool\_spot\_instance\_type](#input\_intial\_node\_pool\_spot\_instance\_type) | Instance size of the initial node pool | `string` | `"Standard_D4s_v5"` | no |
| <a name="input_kubernetes_version"></a> [kubernetes\_version](#input\_kubernetes\_version) | Version of the kubernetes engine | `string` | `"1.28"` | no |
| <a name="input_location"></a> [location](#input\_location) | Location of the resource group | `string` | n/a | yes |
| <a name="input_max_pods_per_node"></a> [max\_pods\_per\_node](#input\_max\_pods\_per\_node) | Max pods per node | `number` | `32` | no |
Expand Down
48 changes: 24 additions & 24 deletions locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,50 +21,50 @@ locals {
}
]
gpupools = [
{
var.enable_A100_node_pools ? {
name = "a100"
vm_size = "Standard_NC24ads_A100_v4"
},
{
} : null,
var.enable_A100_node_pools ? {
name = "a100x2"
vm_size = "Standard_NC48ads_A100_v4"
},
{
} : null,
var.enable_A100_node_pools ? {
name = "a100x4"
vm_size = "Standard_NC96ads_A100_v4"
},
{
} : null,
var.enable_A10_node_pools ? {
name = "a10"
vm_size = "Standard_NV6ads_A10_v5"
},
{
} : null,
var.enable_A10_node_pools ? {
name = "a10x2"
vm_size = "Standard_NV12ads_A10_v5"
},
{
} : null,
var.enable_A10_node_pools ? {
name = "a10x3"
vm_size = "Standard_NV18ads_A10_v5"
},
{
} : null,
var.enable_A10_node_pools ? {
name = "a10x6"
vm_size = "Standard_NV36ads_A10_v5"
},
{
} : null,
var.enable_T4_node_pools ? {
name = "t4"
vm_size = "Standard_NC4as_T4_v3"
},
{
} : null,
var.enable_T4_node_pools ? {
name = "t4x2"
vm_size = "Standard_NC8as_T4_v3"
},
{
} : null,
var.enable_T4_node_pools ? {
name = "t4x4"
vm_size = "Standard_NC16as_T4_v3"
},
{
} : null,
var.enable_T4_node_pools ? {
name = "t4x16"
vm_size = "Standard_NC64as_T4_v3"
}
} : null
]
node_pools = merge({ for k, v in local.cpupools : "${v["name"]}sp" => {
name = "${v["name"]}sp"
Expand Down Expand Up @@ -110,7 +110,7 @@ locals {
zones = []
vnet_subnet_id = var.subnet_id
max_pods = var.max_pods_per_node
} },
} if v != null },
{ for k, v in local.gpupools : "${v["name"]}" => {
name = "${v["name"]}"
node_count = 0
Expand All @@ -131,5 +131,5 @@ locals {
zones = []
vnet_subnet_id = var.subnet_id
max_pods = var.max_pods_per_node
} })
} if v != null })
}
27 changes: 22 additions & 5 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,34 @@ variable "intial_node_pool_instance_type" {
type = string
}

variable "intial_node_pool_spot_instance_type" {
description = "Instance size of the initial node pool"
default = "Standard_D4s_v5"
type = string
}
# variable "intial_node_pool_spot_instance_type" {
# description = "Instance size of the initial node pool"
# default = "Standard_D4s_v5"
# type = string
# }

variable "initial_node_pool_max_surge" {
description = "Max surge in percentage for the intial node pool"
type = string
default = "10"
}
variable "enable_A10_node_pools" {
description = "Enable A10 node pools spot/on-demand"
type = bool
default = true
}

variable "enable_A100_node_pools" {
description = "Enable A100 node pools spot/on-demand"
type = bool
default = true
}

variable "enable_T4_node_pools" {
description = "Enable T4 node pools spot/on-demand"
type = bool
default = true
}

variable "workload_identity_enabled" {
description = "Enable workload identity in the cluster"
Expand Down

0 comments on commit e444a28

Please sign in to comment.