diff --git a/charts/flyte-core/README.md b/charts/flyte-core/README.md index b4c05b623d..dba13a78e1 100644 --- a/charts/flyte-core/README.md +++ b/charts/flyte-core/README.md @@ -61,7 +61,7 @@ helm install gateway bitnami/contour -n flyte | cloud_events.eventsPublisher.topicName | string | `"arn:aws:sns:us-east-2:123456:123-my-topic"` | | | cloud_events.type | string | `"aws"` | | | cluster_resource_manager | object | `{"config":{"cluster_resources":{"customData":[{"production":[{"projectQuotaCpu":{"value":"5"}},{"projectQuotaMemory":{"value":"4000Mi"}}]},{"staging":[{"projectQuotaCpu":{"value":"2"}},{"projectQuotaMemory":{"value":"3000Mi"}}]},{"development":[{"projectQuotaCpu":{"value":"4"}},{"projectQuotaMemory":{"value":"3000Mi"}}]}],"refreshInterval":"5m","standaloneDeployment":false,"templatePath":"/etc/flyte/clusterresource/templates"}},"enabled":true,"podAnnotations":{},"service_account_name":"flyteadmin","standaloneDeployment":false,"templates":[{"key":"aa_namespace","value":"apiVersion: v1\nkind: Namespace\nmetadata:\n name: {{ namespace }}\nspec:\n finalizers:\n - kubernetes\n"},{"key":"ab_project_resource_quota","value":"apiVersion: v1\nkind: ResourceQuota\nmetadata:\n name: project-quota\n namespace: {{ namespace }}\nspec:\n hard:\n limits.cpu: {{ projectQuotaCpu }}\n limits.memory: {{ projectQuotaMemory }}\n"}]}` | Configuration for the Cluster resource manager component. This is an optional component, that enables automatic cluster configuration. This is useful to set default quotas, manage namespaces etc that map to a project/domain | -| cluster_resource_manager.config | object | `{"cluster_resources":{"customData":[{"production":[{"projectQuotaCpu":{"value":"5"}},{"projectQuotaMemory":{"value":"4000Mi"}}]},{"staging":[{"projectQuotaCpu":{"value":"2"}},{"projectQuotaMemory":{"value":"3000Mi"}}]},{"development":[{"projectQuotaCpu":{"value":"4"}},{"projectQuotaMemory":{"value":"3000Mi"}}]}],"refreshInterval":"5m","standaloneDeployment":false,"templatePath":"/etc/flyte/clusterresource/templates"}}` | Configmap for ClusterResource parameters | +| cluster_resource_manager.config | object | `{"cluster_":{"customData":[{"production":[{"projectQuotaCpu":{"value":"5"}},{"projectQuotaMemory":{"value":"4000Mi"}}]},{"staging":[{"projectQuotaCpu":{"value":"2"}},{"projectQuotaMemory":{"value":"3000Mi"}}]},{"development":[{"projectQuotaCpu":{"value":"4"}},{"projectQuotaMemory":{"value":"3000Mi"}}]}],"refreshInterval":"5m","standaloneDeployment":false,"templatePath":"/etc/flyte/clusterresource/templates"}}` | Configmap for ClusterResource parameters | | cluster_resource_manager.config.cluster_resources | object | `{"customData":[{"production":[{"projectQuotaCpu":{"value":"5"}},{"projectQuotaMemory":{"value":"4000Mi"}}]},{"staging":[{"projectQuotaCpu":{"value":"2"}},{"projectQuotaMemory":{"value":"3000Mi"}}]},{"development":[{"projectQuotaCpu":{"value":"4"}},{"projectQuotaMemory":{"value":"3000Mi"}}]}],"refreshInterval":"5m","standaloneDeployment":false,"templatePath":"/etc/flyte/clusterresource/templates"}` | ClusterResource parameters Refer to the [structure](https://pkg.go.dev/github.com/lyft/flyteadmin@v0.3.37/pkg/runtime/interfaces#ClusterResourceConfig) to customize. | | cluster_resource_manager.config.cluster_resources.refreshInterval | string | `"5m"` | How frequently to run the sync process | | cluster_resource_manager.config.cluster_resources.standaloneDeployment | bool | `false` | Starts the cluster resource manager in standalone mode with requisite auth credentials to call flyteadmin service endpoints | diff --git a/flyteplugins/go/tasks/pluginmachinery/flytek8s/container_helper.go b/flyteplugins/go/tasks/pluginmachinery/flytek8s/container_helper.go index dbba1f3b68..739b97081a 100644 --- a/flyteplugins/go/tasks/pluginmachinery/flytek8s/container_helper.go +++ b/flyteplugins/go/tasks/pluginmachinery/flytek8s/container_helper.go @@ -136,62 +136,67 @@ func adjustResourceRequirement(resourceName v1.ResourceName, resourceRequirement // Furthermore, this function handles some clean-up such as converting GPU resources to the recognized Nvidia gpu // resource name and deleting unsupported Storage-type resources. func ApplyResourceOverrides(resources, platformResources v1.ResourceRequirements, assignIfUnset bool) v1.ResourceRequirements { - if len(resources.Requests) == 0 { - resources.Requests = make(v1.ResourceList) - } - - if len(resources.Limits) == 0 { - resources.Limits = make(v1.ResourceList) - } - - // As a fallback, in the case the Flyte workflow object does not have platformResource defaults set, the defaults - // come from the plugin config. - platformResources = resolvePlatformDefaults(platformResources, config.GetK8sPluginConfig().DefaultCPURequest, - config.GetK8sPluginConfig().DefaultMemoryRequest) - - adjustResourceRequirement(v1.ResourceCPU, resources, platformResources, assignIfUnset) - adjustResourceRequirement(v1.ResourceMemory, resources, platformResources, assignIfUnset) - - _, ephemeralStorageRequested := resources.Requests[v1.ResourceEphemeralStorage] - _, ephemeralStorageLimited := resources.Limits[v1.ResourceEphemeralStorage] - - if ephemeralStorageRequested || ephemeralStorageLimited { - adjustResourceRequirement(v1.ResourceEphemeralStorage, resources, platformResources, assignIfUnset) - } - - // TODO: Make configurable. 1/15/2019 Flyte Cluster doesn't support setting storage requests/limits. - // https://github.com/kubernetes/enhancements/issues/362 - delete(resources.Requests, v1.ResourceStorage) - delete(resources.Limits, v1.ResourceStorage) - - gpuResourceName := config.GetK8sPluginConfig().GpuResourceName - shouldAdjustGPU := false - _, gpuRequested := resources.Requests[gpuResourceName] - _, gpuLimited := resources.Limits[gpuResourceName] - if gpuRequested || gpuLimited { - shouldAdjustGPU = true - } - - // Override GPU - if res, found := resources.Requests[resourceGPU]; found { - resources.Requests[gpuResourceName] = res - delete(resources.Requests, resourceGPU) - shouldAdjustGPU = true - } - - if res, found := resources.Limits[resourceGPU]; found { - resources.Limits[gpuResourceName] = res - delete(resources.Limits, resourceGPU) - shouldAdjustGPU = true - } - - if shouldAdjustGPU { - adjustResourceRequirement(gpuResourceName, resources, platformResources, assignIfUnset) - } - - return resources + if len(resources.Requests) == 0 { + resources.Requests = make(v1.ResourceList) + } + + if len(resources.Limits) == 0 { + resources.Limits = make(v1.ResourceList) + } + + // As a fallback, in the case the Flyte workflow object does not have platformResource defaults set, the defaults + // come from the plugin config. + platformResources = resolvePlatformDefaults(platformResources, config.GetK8sPluginConfig().DefaultCPURequest, + config.GetK8sPluginConfig().DefaultMemoryRequest) + + + // Modify the behavior to only assign default values when the user hasn't specified them. + if assignIfUnset { + adjustResourceRequirement(v1.ResourceCPU, resources, platformResources, assignIfUnset) + adjustResourceRequirement(v1.ResourceMemory, resources, platformResources, assignIfUnset) + + _, ephemeralStorageRequested := resources.Requests[v1.ResourceEphemeralStorage] + _, ephemeralStorageLimited := resources.Limits[v1.ResourceEphemeralStorage] + + if ephemeralStorageRequested || ephemeralStorageLimited { + adjustResourceRequirement(v1.ResourceEphemeralStorage, resources, platformResources, assignIfUnset) + } + + // TODO: Make configurable. 1/15/2019 Flyte Cluster doesn't support setting storage requests/limits. + // https://github.com/kubernetes/enhancements/issues/362 + delete(resources.Requests, v1.ResourceStorage) + delete(resources.Limits, v1.ResourceStorage) + + gpuResourceName := config.GetK8sPluginConfig().GpuResourceName + shouldAdjustGPU := false + _, gpuRequested := resources.Requests[gpuResourceName] + _, gpuLimited := resources.Limits[gpuResourceName] + if gpuRequested || gpuLimited { + shouldAdjustGPU = true + } + + // Override GPU + if res, found := resources.Requests[resourceGPU]; found { + resources.Requests[gpuResourceName] = res + delete(resources.Requests, resourceGPU) + shouldAdjustGPU = true + } + + if res, found := resources.Limits[resourceGPU]; found { + resources.Limits[gpuResourceName] = res + delete(resources.Limits, resourceGPU) + shouldAdjustGPU = true + } + + if shouldAdjustGPU { + adjustResourceRequirement(gpuResourceName, resources, platformResources, assignIfUnset) + } + } + + return resources } + // BuildRawContainer constructs a Container based on the definition passed by the TaskExecutionContext. func BuildRawContainer(ctx context.Context, tCtx pluginscore.TaskExecutionContext) (*v1.Container, error) { taskTemplate, err := tCtx.TaskReader().Read(ctx)