From dd34343c5a5c2e1a23cc652ea1eafaae54b14019 Mon Sep 17 00:00:00 2001 From: chayan kumar roy Date: Wed, 18 Dec 2024 05:40:22 +0100 Subject: [PATCH] fix errors details for zonal resize request & align with mixer errors details (#12578) --- mmv1/products/compute/ResizeRequest.yaml | 363 ++++++++---------- .../compute_mig_resize_request_delete.go.tmpl | 7 +- .../compute_mig_resize_request.tf.tmpl | 4 +- 3 files changed, 173 insertions(+), 201 deletions(-) diff --git a/mmv1/products/compute/ResizeRequest.yaml b/mmv1/products/compute/ResizeRequest.yaml index cd2faac099d0..5699b79eb46b 100644 --- a/mmv1/products/compute/ResizeRequest.yaml +++ b/mmv1/products/compute/ResizeRequest.yaml @@ -14,20 +14,16 @@ --- name: 'ResizeRequest' api_resource_type_kind: InstanceGroupManagerResizeRequest -kind: 'compute#instanceGroupManagerResizeRequest' description: | Represents a Managed Instance Group Resize Request Resize Requests are the Managed Instance Group implementation of Dynamic Workload Scheduler Flex Start. - With Dynamic Workload Scheduler in Flex Start mode, you submit a GPU capacity request for your AI/ML jobs by indicating how many you need, a duration, and your preferred region. Dynamic Workload Scheduler intelligently persists the request; once the capacity becomes available, it automatically provisions your VMs enabling your workloads to run continuously for the entire duration of the capacity allocation. + With Dynamic Workload Scheduler in Flex Start mode, you submit a GPU capacity request for your AI/ML jobs by indicating how many you need, a duration, and your preferred zone. Dynamic Workload Scheduler intelligently persists the request; once the capacity becomes available, it automatically provisions your VMs enabling your workloads to run continuously for the entire duration of the capacity allocation. references: guides: - # Link to quickstart in the API's Guides section. For example: - # 'Create and connect to a database': 'https://cloud.google.com/alloydb/docs/quickstart/create-and-connect' - 'QUICKSTART_TITLE': 'https://cloud.google.com/compute/docs/instance-groups/create-resize-requests-mig' - # Link to the REST API reference for the resource. For example, - # https://cloud.google.com/alloydb/docs/reference/rest/v1/projects.locations.backups + 'About resize requests in a MIG': 'https://cloud.google.com/compute/docs/instance-groups/about-resize-requests-mig' + # Link to the REST API reference for the resource. api: 'https://cloud.google.com/compute/docs/reference/rest/v1/instanceGroupManagerResizeRequests' docs: ### List Method ### @@ -43,9 +39,6 @@ timeouts: async: actions: ['create', 'delete', 'update'] type: 'OpAsync' - # Overrides which API calls return operations. Default: ['create', - # 'update', 'delete'] - # actions: ['create', 'update', 'delete'] operation: base_url: '{{op_id}}' result: @@ -70,7 +63,7 @@ parameters: - name: 'zone' type: ResourceRef description: | - Name of the compute zone scoping this request. Name should conform to RFC1035. + The reference of the compute zone scoping this request. url_param_only: true required: true resource: 'Zone' @@ -78,9 +71,7 @@ parameters: - name: 'instanceGroupManager' type: ResourceRef description: | - The name of the managed instance group. The name should conform to RFC1035 or be a resource ID. - Authorization requires the following IAM permission on the specified resource instanceGroupManager: - *compute.instanceGroupManagers.update + The reference of the instance group manager this ResizeRequest is a part of. url_param_only: true required: true resource: 'InstanceGroupManager' @@ -93,16 +84,10 @@ properties: The creation timestamp for this resize request in RFC3339 text format. output: true - name: 'state' - type: Enum + type: String description: | - [Output only] Current state of the request. + Current state of the request. output: true - enum_values: - - 'CREATING' - - 'ACCEPTED' - - 'FAILED' - - 'SUCCEEDED' - - 'CANCELLED' - name: 'name' type: String description: | @@ -125,7 +110,7 @@ properties: - name: 'seconds' type: String description: | - Span of time at a resolution of a second. Must be from 0 to 315,576,000,000 inclusive. Note: these bounds are computed from: 60 sec/min * 60 min/hr * 24 hr/day * 365.25 days/year * 10000 years + Span of time at a resolution of a second. Must be from 600 to 604800 inclusive. Note: minimum and maximum allowed range for requestedRunDuration is 10 minutes (600 seconds) and 7 days(604800 seconds) correspondingly. required: true - name: 'nanos' type: Integer @@ -134,205 +119,68 @@ properties: - name: 'status' type: NestedObject description: | - [Output only] Status of the request. + Status of the request. output: true properties: # Status.error - name: 'error' type: NestedObject description: | - [Output only] Fatal errors encountered during the queueing or provisioning phases of the ResizeRequest that caused the transition to the FAILED state. Contrary to the lastAttempt errors, this field is final and errors are never removed from here, as the ResizeRequest is not going to retry. + Fatal errors encountered during the queueing or provisioning phases of the ResizeRequest that caused the transition to the FAILED state. Contrary to the lastAttempt errors, this field is final and errors are never removed from here, as the ResizeRequest is not going to retry. output: true properties: - name: 'errors' - type: NestedObject - description: | - [Output Only] The array of errors encountered while processing this operation. - output: true - properties: - - name: 'code' - type: String - description: | - [Output Only] The error type identifier for this error. - output: true - - name: 'location' - type: String - description: | - Output Only] Indicates the field in the request that caused the error. This property is optional. - output: true - - name: 'message' - type: String - description: | - [Output Only] An optional, human-readable error message. - output: true - - name: 'errorDetails' - type: NestedObject - description: | - [Output Only] An optional list of messages that contain the error details. There is a set of defined message types to use for providing details.The syntax depends on the error code. For example, QuotaExceededInfo will have details when the error code is QUOTA_EXCEEDED. - output: true - properties: - - name: 'errorInfo' - type: NestedObject - description: | - [Output Only] - output: true - properties: - - name: 'reason' - type: String - description: | - The reason of the error. This is a constant value that identifies the proximate cause of the error. Error reasons are unique within a particular domain of errors. This should be at most 63 characters and match a regular expression of [A-Z][A-Z0-9_]+[A-Z0-9], which represents UPPER_SNAKE_CASE. - output: true - - name: 'domain' - type: String - description: | - The logical grouping to which the "reason" belongs. The error domain is typically the registered service name of the tool or product that generates the error. Example: "pubsub.googleapis.com". If the error is generated by some common infrastructure, the error domain must be a globally unique value that identifies the infrastructure. For Google API infrastructure, the error domain is "googleapis.com". - output: true - - name: 'metadatas' - type: KeyValuePairs - description: | - Additional structured details about this error. - Keys must match /[a-z][a-zA-Z0-9-_]+/ but should ideally be lowerCamelCase. Also they must be limited to 64 characters in length. When identifying the current value of an exceeded limit, the units should be contained in the key, not the value. For example, rather than {"instanceLimit": "100/request"}, should be returned as, {"instanceLimitPerRequest": "100"}, if the client exceeds the number of instances that can be created in a single (batch) request. - output: true - - name: 'quotaInfo' - type: NestedObject - description: | - [Output Only] - output: true - properties: - - name: 'metricName' - type: String - description: | - The Compute Engine quota metric name. - output: true - - name: 'limitName' - type: String - description: | - The name of the quota limit. - output: true - - name: 'dimensions' - type: KeyValuePairs - description: | - The map holding related quota dimensions - output: true - - name: 'limit' - type: Integer - description: | - Current effective quota limit. The limit's unit depends on the quota type or metric. - output: true - - name: 'futureLimit' - type: Integer - description: | - Future quota limit being rolled out. The limit's unit depends on the quota type or metric. - output: true - - name: 'rolloutStatus' - type: String - description: | - Rollout status of the future quota limit. - output: true - - name: 'help' - type: NestedObject - description: | - [Output Only] - output: true - properties: - - name: 'links' - type: NestedObject - description: | - [Output Only] - output: true - properties: - - name: 'description' - type: String - description: | - Describes what the link offers. - output: true - - name: 'url' - type: String - description: | - The URL of the link. - output: true - - name: 'localizedMessage' - type: NestedObject - description: | - [Output Only] - output: true - properties: - - name: 'locale' - type: String - description: | - The locale used following the specification defined at https://www.rfc-editor.org/rfc/bcp/bcp47.txt. Examples are: "en-US", "fr-CH", "es-MX" - output: true - - name: 'message' - type: String - description: | - The localized error message in the above locale. - output: true - # Status.lastAttempt - - name: 'lastAttempt' - type: NestedObject - description: | - [Output only] Information about the last attempt to fulfill the request. The value is temporary since the ResizeRequest can retry, as long as it's still active and the last attempt value can either be cleared or replaced with a different error. Since ResizeRequest retries infrequently, the value may be stale and no longer show an active problem. The value is cleared when ResizeRequest transitions to the final state (becomes inactive). If the final state is FAILED the error describing it will be storred in the "error" field only. - output: true - properties: - - name: 'error' - type: NestedObject + type: Array description: | - [Output only] Fatal errors encountered during the queueing or provisioning phases of the ResizeRequest that caused the transition to the FAILED state. Contrary to the lastAttempt errors, this field is final and errors are never removed from here, as the ResizeRequest is not going to retry. + The array of errors encountered while processing this operation. output: true - properties: - - name: 'errors' - type: NestedObject - description: | - [Output Only] The array of errors encountered while processing this operation. - output: true - properties: - - name: 'code' - type: String - description: | - [Output Only] The error type identifier for this error. - output: true - - name: 'location' - type: String - description: | - Output Only] Indicates the field in the request that caused the error. This property is optional. - output: true - - name: 'message' - type: String - description: | - [Output Only] An optional, human-readable error message. - output: true - - name: 'errorDetails' + item_type: + type: NestedObject + properties: + - name: 'code' + type: String + description: | + The error type identifier for this error. + output: true + - name: 'location' + type: String + description: | + Indicates the field in the request that caused the error. This property is optional. + output: true + - name: 'message' + type: String + description: | + An optional, human-readable error message. + output: true + - name: 'errorDetails' + type: Array + description: | + An array of messages that contain the error details. There is a set of defined message types to use for providing details.The syntax depends on the error code. For example, QuotaExceededInfo will have details when the error code is QUOTA_EXCEEDED. + output: true + item_type: type: NestedObject - description: | - [Output Only] An optional list of messages that contain the error details. There is a set of defined message types to use for providing details.The syntax depends on the error code. For example, QuotaExceededInfo will have details when the error code is QUOTA_EXCEEDED. - output: true properties: - name: 'errorInfo' type: NestedObject - description: | - [Output Only] output: true properties: - name: 'reason' type: String description: | - The reason of the error. This is a constant value that identifies the proximate cause of the error. Error reasons are unique within a particular domain of errors. This should be at most 63 characters and match a regular expression of [A-Z][A-Z0-9_]+[A-Z0-9], which represents UPPER_SNAKE_CASE. + The reason of the error. This is a constant value that identifies the proximate cause of the error. Error reasons are unique within a particular domain of errors. output: true - name: 'domain' type: String description: | - The logical grouping to which the "reason" belongs. The error domain is typically the registered service name of the tool or product that generates the error. Example: "pubsub.googleapis.com". If the error is generated by some common infrastructure, the error domain must be a globally unique value that identifies the infrastructure. For Google API infrastructure, the error domain is "googleapis.com". + The logical grouping to which the "reason" belongs. The error domain is typically the registered service name of the tool or product that generates the error. Example: "pubsub.googleapis.com". output: true - name: 'metadatas' type: KeyValuePairs description: | Additional structured details about this error. - Keys must match /[a-z][a-zA-Z0-9-_]+/ but should ideally be lowerCamelCase. Also they must be limited to 64 characters in length. When identifying the current value of an exceeded limit, the units should be contained in the key, not the value. For example, rather than {"instanceLimit": "100/request"}, should be returned as, {"instanceLimitPerRequest": "100"}, if the client exceeds the number of instances that can be created in a single (batch) request. output: true - name: 'quotaInfo' type: NestedObject - description: | - [Output Only] output: true properties: - name: 'metricName' @@ -367,14 +215,10 @@ properties: output: true - name: 'help' type: NestedObject - description: | - [Output Only] output: true properties: - name: 'links' type: NestedObject - description: | - [Output Only] output: true properties: - name: 'description' @@ -389,8 +233,6 @@ properties: output: true - name: 'localizedMessage' type: NestedObject - description: | - [Output Only] output: true properties: - name: 'locale' @@ -403,3 +245,132 @@ properties: description: | The localized error message in the above locale. output: true + # Status.lastAttempt + - name: 'lastAttempt' + type: NestedObject + description: | + Information about the last attempt to fulfill the request. The value is temporary since the ResizeRequest can retry, as long as it's still active and the last attempt value can either be cleared or replaced with a different error. Since ResizeRequest retries infrequently, the value may be stale and no longer show an active problem. The value is cleared when ResizeRequest transitions to the final state (becomes inactive). If the final state is FAILED the error describing it will be storred in the "error" field only. + output: true + properties: + - name: 'error' + type: NestedObject + description: | + Fatal errors encountered during the queueing or provisioning phases of the ResizeRequest that caused the transition to the FAILED state. Contrary to the lastAttempt errors, this field is final and errors are never removed from here, as the ResizeRequest is not going to retry. + output: true + properties: + - name: 'errors' + type: Array + description: | + The array of errors encountered while processing this operation. + output: true + item_type: + type: NestedObject + properties: + - name: 'code' + type: String + description: | + The error type identifier for this error. + output: true + - name: 'location' + type: String + description: | + Indicates the field in the request that caused the error. This property is optional. + output: true + - name: 'message' + type: String + description: | + An optional, human-readable error message. + output: true + - name: 'errorDetails' + type: Array + description: | + An array of messages that contain the error details. There is a set of defined message types to use for providing details.The syntax depends on the error code. For example, QuotaExceededInfo will have details when the error code is QUOTA_EXCEEDED. + output: true + item_type: + type: NestedObject + properties: + - name: 'errorInfo' + type: NestedObject + output: true + properties: + - name: 'reason' + type: String + description: | + The reason of the error. This is a constant value that identifies the proximate cause of the error. Error reasons are unique within a particular domain of errors. + output: true + - name: 'domain' + type: String + description: | + The logical grouping to which the "reason" belongs. The error domain is typically the registered service name of the tool or product that generates the error. Example: "pubsub.googleapis.com". + output: true + - name: 'metadatas' + type: KeyValuePairs + description: | + Additional structured details about this error. + output: true + - name: 'quotaInfo' + type: NestedObject + output: true + properties: + - name: 'metricName' + type: String + description: | + The Compute Engine quota metric name. + output: true + - name: 'limitName' + type: String + description: | + The name of the quota limit. + output: true + - name: 'dimensions' + type: KeyValuePairs + description: | + The map holding related quota dimensions + output: true + - name: 'limit' + type: Integer + description: | + Current effective quota limit. The limit's unit depends on the quota type or metric. + output: true + - name: 'futureLimit' + type: Integer + description: | + Future quota limit being rolled out. The limit's unit depends on the quota type or metric. + output: true + - name: 'rolloutStatus' + type: String + description: | + Rollout status of the future quota limit. + output: true + - name: 'help' + type: NestedObject + output: true + properties: + - name: 'links' + type: NestedObject + output: true + properties: + - name: 'description' + type: String + description: | + Describes what the link offers. + output: true + - name: 'url' + type: String + description: | + The URL of the link. + output: true + - name: 'localizedMessage' + type: NestedObject + output: true + properties: + - name: 'locale' + type: String + description: | + The locale used following the specification defined at https://www.rfc-editor.org/rfc/bcp/bcp47.txt. Examples are: "en-US", "fr-CH", "es-MX" + output: true + - name: 'message' + type: String + description: | + The localized error message in the above locale. + output: true diff --git a/mmv1/templates/terraform/custom_delete/compute_mig_resize_request_delete.go.tmpl b/mmv1/templates/terraform/custom_delete/compute_mig_resize_request_delete.go.tmpl index f0a4a2dc3592..d025a6796b1c 100644 --- a/mmv1/templates/terraform/custom_delete/compute_mig_resize_request_delete.go.tmpl +++ b/mmv1/templates/terraform/custom_delete/compute_mig_resize_request_delete.go.tmpl @@ -19,8 +19,9 @@ if err != nil { return err } -// Get current state (accepted or not) and delete -if d.Get("state") == "ACCEPTED" { +// If a resize request is in the CREATING or ACCEPTED state, it must be canceled before it can be +// deleted. If a resize request is NOT in any of the mentioned state, it can be directly deleted. +if d.Get("state") == "CREATING" || d.Get("state") == "ACCEPTED" { // cancel resize request res, err := transport_tpg.SendRequest(transport_tpg.SendRequestOptions{ Config: config, @@ -55,7 +56,7 @@ res, err := transport_tpg.SendRequest(transport_tpg.SendRequestOptions{ }) err = ComputeOperationWaitTime( -config, res, project, "Cancelling the resize request", userAgent, +config, res, project, "Deleting the resize request", userAgent, d.Timeout(schema.TimeoutDelete)) diff --git a/mmv1/templates/terraform/examples/compute_mig_resize_request.tf.tmpl b/mmv1/templates/terraform/examples/compute_mig_resize_request.tf.tmpl index 635e8db71532..fe81ddf4bb14 100644 --- a/mmv1/templates/terraform/examples/compute_mig_resize_request.tf.tmpl +++ b/mmv1/templates/terraform/examples/compute_mig_resize_request.tf.tmpl @@ -1,5 +1,5 @@ resource "google_compute_region_instance_template" "a3_dws" { - name = "a3-dws" + name = "{{index $.Vars "resize_request_name"}}" region = "us-central1" description = "This template is used to create a mig instance that is compatible with DWS resize requests." instance_description = "A3 GPU" @@ -40,7 +40,7 @@ resource "google_compute_region_instance_template" "a3_dws" { } resource "google_compute_instance_group_manager" "a3_dws" { - name = "a3-dws" + name = "{{index $.Vars "resize_request_name"}}" base_instance_name = "a3-dws" zone = "us-central1-a"