From 38bbbcecdcf94a59eb4e4ef8a818009045ef2a74 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Fri, 23 Feb 2024 06:04:33 +0000 Subject: [PATCH 01/51] Propagate initia glue l2 jobl refactor from natalie-white-aws/aws-cdk-glue-l2 to mjanardhan/aws-cdk-glue-l2 to resolve non-trivial merge situation --- .../@aws-cdk/aws-glue-alpha/lib/constants.ts | 240 ++++++++ packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 12 +- .../aws-glue-alpha/lib/job-executable.ts | 183 +----- packages/@aws-cdk/aws-glue-alpha/lib/job.ts | 302 ++-------- .../aws-glue-alpha/lib/jobs/flex-job.ts | 13 + .../@aws-cdk/aws-glue-alpha/lib/jobs/job.ts | 546 ++++++++++++++++++ .../lib/jobs/pyspark-etl-job.ts | 180 ++++++ .../lib/jobs/python-shell-job.ts | 8 + .../aws-glue-alpha/lib/jobs/ray-job.ts | 7 + .../lib/jobs/scala-spark-etl-job.ts | 12 + .../aws-glue-alpha/lib/jobs/spark-etl-job.ts | 10 + .../aws-glue-alpha/lib/jobs/spark-ui-utils.ts | 72 +++ .../aws-glue-alpha/lib/jobs/streaming-job.ts | 13 + .../lib/triggers/conditional-triggers.ts | 7 + .../lib/triggers/notify-event-trigger.ts | 10 + .../lib/triggers/on-demand-trigger.ts | 8 + .../lib/triggers/scheduled-trigger.ts | 12 + .../aws-glue-alpha/lib/triggers/trigger.ts | 7 + .../@aws-cdk/aws-glue-alpha/test/code.test.ts | 12 +- .../aws-glue-alpha/test/constants.test.ts | 95 +++ .../test/integ.job-python-shell.ts | 4 +- .../@aws-cdk/aws-glue-alpha/test/integ.job.ts | 22 +- .../test/job-executable.test.ts | 81 +-- .../@aws-cdk/aws-glue-alpha/test/job.test.ts | 218 ++----- 24 files changed, 1391 insertions(+), 683 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/constants.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui-utils.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-triggers.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/constants.test.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts new file mode 100644 index 0000000000000..7b1cfd7896fdf --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts @@ -0,0 +1,240 @@ +/** + * The type of predefined worker that is allocated when a job runs. + * + * If you need to use a WorkerType that doesn't exist as a static member, you + * can instantiate a `WorkerType` object, e.g: `WorkerType.of('other type')` + */ +export enum WorkerType { + /** + * Standard Worker Type + * 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. + */ + STANDARD = 'Standard', + + /** + * G.1X Worker Type + * 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for memory-intensive jobs. + */ + G_1X = 'G.1X', + + /** + * G.2X Worker Type + * 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. Suitable for memory-intensive jobs. + */ + G_2X = 'G.2X', + + /** + * G.4X Worker Type + * 4 DPU (16 vCPU, 64 GB of memory, 256 GB disk), and provides 1 executor per worker. + * We recommend this worker type for jobs whose workloads contain your most demanding transforms, + * aggregations, joins, and queries. This worker type is available only for AWS Glue version 3.0 or later jobs. + */ + G_4X = 'G.4X', + + /** + * G.8X Worker Type + * 8 DPU (32 vCPU, 128 GB of memory, 512 GB disk), and provides 1 executor per worker. We recommend this worker + * type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. + * This worker type is available only for AWS Glue version 3.0 or later jobs. + */ + G_8X = 'G.8X', + + /** + * G.025X Worker Type + * 0.25 DPU (2 vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for low volume streaming jobs. + */ + G_025X = 'G.025X', + + /** + * Z.2X Worker Type + */ + Z_2X = 'Z.2X', +} + +/** + * Job states emitted by Glue to CloudWatch Events. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types for more information. + */ +export enum JobState { + /** + * State indicating job run succeeded + */ + SUCCEEDED = 'SUCCEEDED', + + /** + * State indicating job run failed + */ + FAILED = 'FAILED', + + /** + * State indicating job run timed out + */ + TIMEOUT = 'TIMEOUT', + + /** + * State indicating job is starting + */ + STARTING = 'STARTING', + + /** + * State indicating job is running + */ + RUNNING = 'RUNNING', + + /** + * State indicating job is stopping + */ + STOPPING = 'STOPPING', + + /** + * State indicating job stopped + */ + STOPPED = 'STOPPED', +} + +/** + * The Glue CloudWatch metric type. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html + */ +export enum MetricType { + /** + * A value at a point in time. + */ + GAUGE = 'gauge', + + /** + * An aggregate number. + */ + COUNT = 'count', +} + +/** + * The ExecutionClass whether the job is run with a standard or flexible execution class. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html#aws-glue-api-jobs-job-Job + * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html + */ +export enum ExecutionClass { + /** + * The flexible execution class is appropriate for time-insensitive jobs whose start + * and completion times may vary. + */ + FLEX = 'FLEX', + + /** + * The standard execution class is ideal for time-sensitive workloads that require fast job + * startup and dedicated resources. + */ + STANDARD = 'STANDARD', +} + +/** + * AWS Glue version determines the versions of Apache Spark and Python that are available to the job. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html. + */ +export enum GlueVersion { + /** + * Glue version using Spark 2.2.1 and Python 2.7 + */ + V0_9 = '0.9', + + /** + * Glue version using Spark 2.4.3, Python 2.7 and Python 3.6 + */ + V1_0 = '1.0', + + /** + * Glue version using Spark 2.4.3 and Python 3.7 + */ + V2_0 = '2.0', + + /** + * Glue version using Spark 3.1.1 and Python 3.7 + */ + V3_0 = '3.0', + + /** + * Glue version using Spark 3.3.0 and Python 3.10 + */ + V4_0 = '4.0', + +} + +/** + * Runtime language of the Glue job + */ +export enum JobLanguage { + /** + * Scala + */ + SCALA = 'scala', + + /** + * Python + */ + PYTHON = 'python', +} + +/** + * Python version + */ +export enum PythonVersion { + /** + * Python 2 (the exact version depends on GlueVersion and JobCommand used) + */ + TWO = '2', + + /** + * Python 3 (the exact version depends on GlueVersion and JobCommand used) + */ + THREE = '3', + + /** + * Python 3.9 (the exact version depends on GlueVersion and JobCommand used) + */ + THREE_NINE = '3.9', +} + +/** + * AWS Glue runtime determines the runtime engine of the job. + * + */ +export enum Runtime { + /** + * Runtime for a Glue for Ray 2.4. + */ + RAY_TWO_FOUR = 'Ray2.4', +} + +/** + * The job type. + * + * If you need to use a JobType that doesn't exist as a static member, you + * can instantiate a `JobType` object, e.g: `JobType.of('other name')`. + */ +export enum JobType { + /** + * Command for running a Glue Spark job. + */ + ETL = 'glueetl', + + /** + * Command for running a Glue Spark streaming job. + */ + STREAMING = 'gluestreaming', + + /** + * Command for running a Glue python shell job. + */ + PYTHON_SHELL = 'pythonshell', + + /** + * Command for running a Glue Ray job. + */ + RAY = 'glueray', + +} + diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index 1b9514c14625e..5c5d13f9b5c76 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -12,5 +12,15 @@ export * from './s3-table'; export * from './schema'; export * from './security-configuration'; export * from './storage-parameter'; +export * from './constants'; +export * from './jobs/job'; +// export * from './jobs/flex-job'; +export * from './jobs/pyspark-etl-job'; +// export * from './jobs/python-shell-job'; +// export * from './jobs/ray-job'; +// export * from './jobs/scala-spark-etl-job'; +export * from './jobs/spark-ui-utils'; +// export * from './jobs/spark-etl-job'; +//export * from './jobs/streaming-job'; export * from './table-base'; -export * from './table-deprecated'; +export * from './table-deprecated'; \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts index 15cb5757e88b3..da73b2e17136e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts @@ -1,164 +1,5 @@ import { Code } from './code'; - -/** - * AWS Glue version determines the versions of Apache Spark and Python that are available to the job. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html. - * - * If you need to use a GlueVersion that doesn't exist as a static member, you - * can instantiate a `GlueVersion` object, e.g: `GlueVersion.of('1.5')`. - */ -export class GlueVersion { - /** - * Glue version using Spark 2.2.1 and Python 2.7 - */ - public static readonly V0_9 = new GlueVersion('0.9'); - - /** - * Glue version using Spark 2.4.3, Python 2.7 and Python 3.6 - */ - public static readonly V1_0 = new GlueVersion('1.0'); - - /** - * Glue version using Spark 2.4.3 and Python 3.7 - */ - public static readonly V2_0 = new GlueVersion('2.0'); - - /** - * Glue version using Spark 3.1.1 and Python 3.7 - */ - public static readonly V3_0 = new GlueVersion('3.0'); - - /** - * Glue version using Spark 3.3.0 and Python 3.10 - */ - public static readonly V4_0 = new GlueVersion('4.0'); - - /** - * Custom Glue version - * @param version custom version - */ - public static of(version: string): GlueVersion { - return new GlueVersion(version); - } - - /** - * The name of this GlueVersion, as expected by Job resource. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} - -/** - * Runtime language of the Glue job - */ -export enum JobLanguage { - /** - * Scala - */ - SCALA = 'scala', - - /** - * Python - */ - PYTHON = 'python', -} - -/** - * Python version - */ -export enum PythonVersion { - /** - * Python 2 (the exact version depends on GlueVersion and JobCommand used) - */ - TWO = '2', - - /** - * Python 3 (the exact version depends on GlueVersion and JobCommand used) - */ - THREE = '3', - - /** - * Python 3.9 (the exact version depends on GlueVersion and JobCommand used) - */ - THREE_NINE = '3.9', -} - -/** - * AWS Glue runtime determines the runtime engine of the job. - * - */ -export class Runtime { - /** - * Runtime for a Glue for Ray 2.4. - */ - public static readonly RAY_TWO_FOUR = new Runtime('Ray2.4'); - - /** - * Custom runtime - * @param runtime custom runtime - */ - public static of(runtime: string): Runtime { - return new Runtime(runtime); - } - - /** - * The name of this Runtime. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} - -/** - * The job type. - * - * If you need to use a JobType that doesn't exist as a static member, you - * can instantiate a `JobType` object, e.g: `JobType.of('other name')`. - */ -export class JobType { - /** - * Command for running a Glue Spark job. - */ - public static readonly ETL = new JobType('glueetl'); - - /** - * Command for running a Glue Spark streaming job. - */ - public static readonly STREAMING = new JobType('gluestreaming'); - - /** - * Command for running a Glue python shell job. - */ - public static readonly PYTHON_SHELL = new JobType('pythonshell'); - - /** - * Command for running a Glue Ray job. - */ - public static readonly RAY = new JobType('glueray'); - - /** - * Custom type name - * @param name type name - */ - public static of(name: string): JobType { - return new JobType(name); - } - - /** - * The name of this JobType, as expected by Job resource. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} +import { GlueVersion, JobType, PythonVersion, Runtime, JobLanguage } from './constants'; interface PythonExecutableProps { /** @@ -350,40 +191,40 @@ export class JobExecutable { private config: JobExecutableConfig; private constructor(config: JobExecutableConfig) { - const glueVersion = config.glueVersion.name; - const type = config.type.name; - if (JobType.PYTHON_SHELL.name === type) { + const glueVersion = config.glueVersion; + const type = config.type; + if (JobType.PYTHON_SHELL === type) { if (config.language !== JobLanguage.PYTHON) { throw new Error('Python shell requires the language to be set to Python'); } - if ([GlueVersion.V0_9.name, GlueVersion.V4_0.name].includes(glueVersion)) { + if ([GlueVersion.V0_9, GlueVersion.V4_0].includes(glueVersion)) { throw new Error(`Specified GlueVersion ${glueVersion} does not support Python Shell`); } } - if (JobType.RAY.name === type) { + if (JobType.RAY === type) { if (config.language !== JobLanguage.PYTHON) { throw new Error('Ray requires the language to be set to Python'); } - if ([GlueVersion.V0_9.name, GlueVersion.V1_0.name, GlueVersion.V2_0.name, GlueVersion.V3_0.name].includes(glueVersion)) { + if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0, GlueVersion.V3_0].includes(glueVersion)) { throw new Error(`Specified GlueVersion ${glueVersion} does not support Ray`); } } - if (config.extraJarsFirst && [GlueVersion.V0_9.name, GlueVersion.V1_0.name].includes(glueVersion)) { + if (config.extraJarsFirst && [GlueVersion.V0_9, GlueVersion.V1_0].includes(glueVersion)) { throw new Error(`Specified GlueVersion ${glueVersion} does not support extraJarsFirst`); } - if (config.pythonVersion === PythonVersion.TWO && ![GlueVersion.V0_9.name, GlueVersion.V1_0.name].includes(glueVersion)) { + if (config.pythonVersion === PythonVersion.TWO && ![GlueVersion.V0_9, GlueVersion.V1_0].includes(glueVersion)) { throw new Error(`Specified GlueVersion ${glueVersion} does not support PythonVersion ${config.pythonVersion}`); } if (JobLanguage.PYTHON !== config.language && config.extraPythonFiles) { throw new Error('extraPythonFiles is not supported for languages other than JobLanguage.PYTHON'); } - if (config.pythonVersion === PythonVersion.THREE_NINE && type !== JobType.PYTHON_SHELL.name && type !== JobType.RAY.name) { + if (config.pythonVersion === PythonVersion.THREE_NINE && type !== JobType.PYTHON_SHELL && type !== JobType.RAY) { throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell and Ray'); } - if (config.pythonVersion === PythonVersion.THREE && type === JobType.RAY.name) { + if (config.pythonVersion === PythonVersion.THREE && type === JobType.RAY) { throw new Error('Specified PythonVersion PythonVersion.THREE is not supported for Ray'); } - if (config.runtime === undefined && type === JobType.RAY.name) { + if (config.runtime === undefined && type === JobType.RAY) { throw new Error('Runtime is required for Ray jobs.'); } this.config = config; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts index 526cb774017c3..0ebafb3db0c58 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts @@ -5,155 +5,19 @@ import * as iam from 'aws-cdk-lib/aws-iam'; import * as logs from 'aws-cdk-lib/aws-logs'; import * as s3 from 'aws-cdk-lib/aws-s3'; import * as cdk from 'aws-cdk-lib/core'; -import * as constructs from 'constructs'; -import { Code, GlueVersion, JobExecutable, JobExecutableConfig, JobType } from '.'; +import { Code } from '.'; +import { JobExecutable, JobExecutableConfig } from './job-executable'; import { IConnection } from './connection'; import { CfnJob } from 'aws-cdk-lib/aws-glue'; import { ISecurityConfiguration } from './security-configuration'; - -/** - * The type of predefined worker that is allocated when a job runs. - * - * If you need to use a WorkerType that doesn't exist as a static member, you - * can instantiate a `WorkerType` object, e.g: `WorkerType.of('other type')`. - */ -export class WorkerType { - /** - * Each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. - */ - public static readonly STANDARD = new WorkerType('Standard'); - - /** - * Each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for memory-intensive jobs. - */ - public static readonly G_1X = new WorkerType('G.1X'); - - /** - * Each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. Suitable for memory-intensive jobs. - */ - public static readonly G_2X = new WorkerType('G.2X'); - - /** - * Each worker maps to 4 DPU (16 vCPU, 64 GB of memory, 256 GB disk), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for AWS Glue version 3.0 or later jobs. - */ - public static readonly G_4X = new WorkerType('G.4X'); - - /** - * Each worker maps to 8 DPU (32 vCPU, 128 GB of memory, 512 GB disk), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for AWS Glue version 3.0 or later jobs. - */ - public static readonly G_8X = new WorkerType('G.8X'); - - /** - * Each worker maps to 0.25 DPU (2 vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for low volume streaming jobs. - */ - public static readonly G_025X = new WorkerType('G.025X'); - - /** - * Each worker maps to 2 high-memory DPU [M-DPU] (8 vCPU, 64 GB of memory, 128 GB disk). Supported in Ray jobs. - */ - public static readonly Z_2X = new WorkerType('Z.2X'); - - /** - * Custom worker type - * @param workerType custom worker type - */ - public static of(workerType: string): WorkerType { - return new WorkerType(workerType); - } - - /** - * The name of this WorkerType, as expected by Job resource. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} - -/** - * Job states emitted by Glue to CloudWatch Events. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types for more information. - */ -export enum JobState { - /** - * State indicating job run succeeded - */ - SUCCEEDED = 'SUCCEEDED', - - /** - * State indicating job run failed - */ - FAILED = 'FAILED', - - /** - * State indicating job run timed out - */ - TIMEOUT = 'TIMEOUT', - - /** - * State indicating job is starting - */ - STARTING = 'STARTING', - - /** - * State indicating job is running - */ - RUNNING = 'RUNNING', - - /** - * State indicating job is stopping - */ - STOPPING = 'STOPPING', - - /** - * State indicating job stopped - */ - STOPPED = 'STOPPED', -} - -/** - * The Glue CloudWatch metric type. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html - */ -export enum MetricType { - /** - * A value at a point in time. - */ - GAUGE = 'gauge', - - /** - * An aggregate number. - */ - COUNT = 'count', -} - -/** - * The ExecutionClass whether the job is run with a standard or flexible execution class. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html#aws-glue-api-jobs-job-Job - * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html - */ -export enum ExecutionClass { - /** - * The flexible execution class is appropriate for time-insensitive jobs whose start - * and completion times may vary. - */ - FLEX = 'FLEX', - - /** - * The standard execution class is ideal for time-sensitive workloads that require fast job - * startup and dedicated resources. - */ - STANDARD = 'STANDARD', -} +import { JobType, JobState, MetricType, ExecutionClass, WorkerType } from './constants'; +import { Construct } from 'constructs'; +import { SparkUIProps, SparkUILoggingLocation } from './jobs/spark-ui-utils'; /** * Interface representing a created or an imported `Job`. */ -export interface IJob extends cdk.IResource, iam.IGrantable { +export interface IJobLegacy extends cdk.IResource, iam.IGrantable { /** * The name of the job. * @attribute @@ -173,13 +37,6 @@ export interface IJob extends cdk.IResource, iam.IGrantable { */ onEvent(id: string, options?: events.OnEventOptions): events.Rule; - /** - * Defines a CloudWatch event rule triggered when this job moves to the input jobState. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - onStateChange(id: string, jobState: JobState, options?: events.OnEventOptions): events.Rule; - /** * Defines a CloudWatch event rule triggered when this job moves to the SUCCEEDED state. * @@ -228,7 +85,7 @@ export interface IJob extends cdk.IResource, iam.IGrantable { metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric; } -abstract class JobBase extends cdk.Resource implements IJob { +abstract class JobBaseLegacy extends cdk.Resource implements IJobLegacy { public abstract readonly jobArn: string; public abstract readonly jobName: string; @@ -265,7 +122,7 @@ abstract class JobBase extends cdk.Resource implements IJob { * @param jobState the job state. * @param options optional event options. */ - public onStateChange(id: string, jobState: JobState, options: events.OnEventOptions = {}): events.Rule { + protected onStateChange(id: string, jobState: JobState, options: events.OnEventOptions = {}): events.Rule { const rule = this.onEvent(id, { description: `Rule triggered when Glue job ${this.jobName} is in ${jobState} state`, ...options, @@ -369,63 +226,13 @@ abstract class JobBase extends cdk.Resource implements IJob { } } -/** - * Properties for enabling Spark UI monitoring feature for Spark-based Glue jobs. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ -export interface SparkUIProps { - /** - * Enable Spark UI. - */ - readonly enabled: boolean; - - /** - * The bucket where the Glue job stores the logs. - * - * @default - a new bucket will be created. - */ - readonly bucket?: s3.IBucket; - - /** - * The path inside the bucket (objects prefix) where the Glue job stores the logs. - * Use format `'foo/bar/'` - * - * @default - the logs will be written at the root of the bucket - */ - readonly prefix?: string; -} - -/** - * The Spark UI logging location. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ -export interface SparkUILoggingLocation { - /** - * The bucket where the Glue job stores the logs. - * - * @default - a new bucket will be created. - */ - readonly bucket: s3.IBucket; - - /** - * The path inside the bucket (objects prefix) where the Glue job stores the logs. - * - * @default - the logs will be written at the root of the bucket - */ - readonly prefix?: string; -} - /** * Properties for enabling Continuous Logging for Glue Jobs. * * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ -export interface ContinuousLoggingProps { +export interface ContinuousLoggingPropsLegacy { /** * Enable continouous logging. */ @@ -465,7 +272,7 @@ export interface ContinuousLoggingProps { /** * Attributes for importing `Job`. */ -export interface JobAttributes { +export interface JobLegacyAttributes { /** * The name of the job. */ @@ -482,7 +289,7 @@ export interface JobAttributes { /** * Construction properties for `Job`. */ -export interface JobProps { +export interface JobLegacyProps { /** * The job's executable properties. */ @@ -623,7 +430,7 @@ export interface JobProps { * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ - readonly continuousLogging?: ContinuousLoggingProps; + readonly continuousLogging?: ContinuousLoggingPropsLegacy; /** * The ExecutionClass whether the job is run with a standard or flexible execution class. @@ -638,8 +445,9 @@ export interface JobProps { /** * A Glue Job. + * @resource AWS::Glue::Job */ -export class Job extends JobBase { +export class JobLegacy extends JobBaseLegacy { /** * Creates a Glue Job * @@ -647,8 +455,8 @@ export class Job extends JobBase { * @param id The construct's id. * @param attrs Import attributes */ - public static fromJobAttributes(scope: constructs.Construct, id: string, attrs: JobAttributes): IJob { - class Import extends JobBase { + public static fromJobLegacyAttributes(scope: Construct, id: string, attrs: JobLegacyAttributes): IJobLegacy { + class Import extends JobBaseLegacy { public readonly jobName = attrs.jobName; public readonly jobArn = jobArn(scope, attrs.jobName); public readonly grantPrincipal = attrs.role ?? new iam.UnknownPrincipal({ resource: this }); @@ -685,7 +493,7 @@ export class Job extends JobBase { */ public readonly sparkUILoggingLocation?: SparkUILoggingLocation; - constructor(scope: constructs.Construct, id: string, props: JobProps) { + constructor(scope: Construct, id: string, props: JobLegacyProps) { super(scope, id, { physicalName: props.jobName, }); @@ -698,7 +506,7 @@ export class Job extends JobBase { }); this.grantPrincipal = this.role; - const sparkUI = props.sparkUI?.enabled ? this.setupSparkUI(executable, this.role, props.sparkUI) : undefined; + const sparkUI = props.sparkUI ? this.setupSparkUI(executable, this.role, props.sparkUI) : undefined;; this.sparkUILoggingLocation = sparkUI?.location; const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; const profilingMetricsArgs = props.enableProfilingMetrics ? { '--enable-metrics': '' } : {}; @@ -711,49 +519,51 @@ export class Job extends JobBase { ...this.checkNoReservedArgs(props.defaultArguments), }; - if (props.executionClass === ExecutionClass.FLEX) { - if (executable.type !== JobType.ETL) { - throw new Error('FLEX ExecutionClass is only available for JobType.ETL jobs'); - } - if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0].includes(executable.glueVersion)) { - throw new Error('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); - } - if (props.workerType && (props.workerType !== WorkerType.G_1X && props.workerType !== WorkerType.G_2X)) { - throw new Error('FLEX ExecutionClass is only available for WorkerType G_1X or G_2X'); - } - } - - let maxCapacity = props.maxCapacity; - if (maxCapacity !== undefined && (props.workerType && props.workerCount !== undefined)) { - throw new Error('maxCapacity cannot be used when setting workerType and workerCount'); - } - if (executable.type !== JobType.PYTHON_SHELL) { - if (maxCapacity !== undefined && ![GlueVersion.V0_9, GlueVersion.V1_0].includes(executable.glueVersion)) { - throw new Error('maxCapacity cannot be used when GlueVersion 2.0 or later'); - } - } else { - // max capacity validation for python shell jobs (defaults to 0.0625) - maxCapacity = maxCapacity ?? 0.0625; - if (maxCapacity !== 0.0625 && maxCapacity !== 1) { - throw new Error(`maxCapacity value must be either 0.0625 or 1 for JobType.PYTHON_SHELL jobs, received ${maxCapacity}`); - } - } - if ((!props.workerType && props.workerCount !== undefined) || (props.workerType && props.workerCount === undefined)) { - throw new Error('Both workerType and workerCount must be set'); - } + // TODO: Implement these validations as interface contracts + + // if (props.executionClass === ExecutionClass.FLEX) { + // if (executable.type !== JobType.ETL) { + // throw new Error('FLEX ExecutionClass is only available for JobType.ETL jobs'); + // } + // if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0].includes(executable.glueVersion)) { + // throw new Error('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); + // } + // if (props.workerType && (props.workerType !== WorkerType.G_1X && props.workerType !== WorkerType.G_2X)) { + // throw new Error('FLEX ExecutionClass is only available for WorkerType G_1X or G_2X'); + // } + // } + + // let maxCapacity = props.maxCapacity; + // if (maxCapacity !== undefined && (props.workerType && props.workerCount !== undefined)) { + // throw new Error('maxCapacity cannot be used when setting workerType and workerCount'); + // } + // if (executable.type !== JobType.PYTHON_SHELL) { + // if (maxCapacity !== undefined && ![GlueVersion.V0_9, GlueVersion.V1_0].includes(executable.glueVersion)) { + // throw new Error('maxCapacity cannot be used when GlueVersion 2.0 or later'); + // } + // } else { + // // max capacity validation for python shell jobs (defaults to 0.0625) + // maxCapacity = maxCapacity ?? 0.0625; + // if (maxCapacity !== 0.0625 && maxCapacity !== 1) { + // throw new Error(`maxCapacity value must be either 0.0625 or 1 for JobType.PYTHON_SHELL jobs, received ${maxCapacity}`); + // } + // } + // if ((!props.workerType && props.workerCount !== undefined) || (props.workerType && props.workerCount === undefined)) { + // throw new Error('Both workerType and workerCount must be set'); + // } const jobResource = new CfnJob(this, 'Resource', { name: props.jobName, description: props.description, role: this.role.roleArn, command: { - name: executable.type.name, + name: executable.type, scriptLocation: this.codeS3ObjectUrl(executable.script), pythonVersion: executable.pythonVersion, - runtime: executable.runtime ? executable.runtime.name : undefined, + runtime: executable.runtime ? executable.runtime : undefined, }, - glueVersion: executable.glueVersion.name, - workerType: props.workerType?.name, + glueVersion: executable.glueVersion, + workerType: props.workerType, numberOfWorkers: props.workerCount, maxCapacity: props.maxCapacity, maxRetries: props.maxRetries, @@ -859,7 +669,7 @@ export class Job extends JobBase { return prefix !== undefined ? `${prefix}*` : undefined; } - private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) { + private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingPropsLegacy) { const args: {[key: string]: string} = { '--enable-continuous-cloudwatch-log': 'true', '--enable-continuous-log-filter': (props.quiet ?? true).toString(), @@ -908,7 +718,7 @@ function metricRule(rule: events.IRule, props?: cloudwatch.MetricOptions): cloud * @param scope * @param jobName */ -function jobArn(scope: constructs.Construct, jobName: string) : string { +function jobArn(scope: Construct, jobName: string) : string { return cdk.Stack.of(scope).formatArn({ service: 'glue', resource: 'job', diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts new file mode 100644 index 0000000000000..04cc4c7c2403f --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts @@ -0,0 +1,13 @@ +/** + * Flex Jobs class + * + * Flex jobs supports Python and Scala language. + * The flexible execution class is appropriate for non-urgent jobs such as + * pre-production jobs, testing, and one-time data loads. + * Flexible job runs are supported for jobs using AWS Glue version 3.0 or later and G.1X or + * G.2X worker types but will default to the latest version of Glue (currently Glue 3.0.) + * + * Similar to ETL, we’ll enable these features: —enable-metrics, —enable-spark-ui, + * —enable-continuous-cloudwatch-log + * + */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts new file mode 100644 index 0000000000000..8dbe99ea6d916 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts @@ -0,0 +1,546 @@ +import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; +import * as events from 'aws-cdk-lib/aws-events'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as logs from 'aws-cdk-lib/aws-logs'; +import * as cdk from 'aws-cdk-lib/core'; +import * as constructs from 'constructs'; +import { Code } from '..'; +import { MetricType, JobState, WorkerType, GlueVersion } from '../constants'; +import { IConnection } from '../connection'; +import { ISecurityConfiguration } from '../security-configuration'; + +/** + * Interface representing a new or an imported Glue Job + */ +export interface IJob extends cdk.IResource, iam.IGrantable { + /** + * The name of the job. + * @attribute + */ + readonly jobName: string; + + /** + * The ARN of the job. + * @attribute + */ + readonly jobArn: string; + + /** + * Defines a CloudWatch event rule triggered when something happens with this job. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + onEvent(id: string, options?: events.OnEventOptions): events.Rule; + + /** + * Defines a CloudWatch event rule triggered when this job moves to the SUCCEEDED state. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + onSuccess(id: string, options?: events.OnEventOptions): events.Rule; + + /** + * Defines a CloudWatch event rule triggered when this job moves to the FAILED state. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + onFailure(id: string, options?: events.OnEventOptions): events.Rule; + + /** + * Defines a CloudWatch event rule triggered when this job moves to the TIMEOUT state. + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + onTimeout(id: string, options?: events.OnEventOptions): events.Rule; + + /** + * Create a CloudWatch metric. + * + * @param metricName name of the metric typically prefixed with `glue.driver.`, `glue..` or `glue.ALL.`. + * @param type the metric type. + * @param props metric options. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html + */ + metric(metricName: string, type: MetricType, props?: cloudwatch.MetricOptions): cloudwatch.Metric; + + /** + * Create a CloudWatch Metric indicating job success. + */ + metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric; + + /** + * Create a CloudWatch Metric indicating job failure. + */ + metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric; + + /** + * Create a CloudWatch Metric indicating job timeout. + */ + metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric; +} + +/** + * Properties for enabling Continuous Logging for Glue Jobs. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ +export interface ContinuousLoggingProps { + /** + * Enable continouous logging. + */ + readonly enabled: boolean; + + /** + * Specify a custom CloudWatch log group name. + * + * @default - a log group is created with name `/aws-glue/jobs/logs-v2/`. + */ + readonly logGroup?: logs.ILogGroup; + + /** + * Specify a custom CloudWatch log stream prefix. + * + * @default - the job run ID. + */ + readonly logStreamPrefix?: string; + + /** + * Filter out non-useful Apache Spark driver/executor and Apache Hadoop YARN heartbeat log messages. + * + * @default true + */ + readonly quiet?: boolean; + + /** + * Apply the provided conversion pattern. + * + * This is a Log4j Conversion Pattern to customize driver and executor logs. + * + * @default `%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n` + */ + readonly conversionPattern?: string; +} + +/** + * A base class is needed to be able to import existing Jobs into a CDK app to + * reference as part of a larger stack or construct. JobBase has the subset + * of attribtues required to idenitfy and reference an existing Glue Job, + * as well as some CloudWatch metric conveneince functions to configure an + * event-driven flow using the job. + */ +export abstract class JobBase extends cdk.Resource implements IJob { + + public abstract readonly jobArn: string; + public abstract readonly jobName: string; + public abstract readonly grantPrincipal: iam.IPrincipal; + + /** + * Create a CloudWatch Event Rule for this Glue Job when it's in a given state + * + * @param id construct id + * @param options event options. Note that some values are overridden if provided, these are + * - eventPattern.source = ['aws.glue'] + * - eventPattern.detailType = ['Glue Job State Change', 'Glue Job Run Status'] + * - eventPattern.detail.jobName = [this.jobName] + * + * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types + */ + public onEvent(id: string, options: events.OnEventOptions = {}): events.Rule { + const rule = new events.Rule(this, id, options); + rule.addTarget(options.target); + rule.addEventPattern({ + source: ['aws.glue'], + detailType: ['Glue Job State Change', 'Glue Job Run Status'], + detail: { + jobName: [this.jobName], + }, + }); + return rule; + } + + /** + * Create a CloudWatch Event Rule for the transition into the input jobState. + * + * @param id construct id. + * @param jobState the job state. + * @param options optional event options. + */ + protected onStateChange(id: string, jobState: JobState, options: events.OnEventOptions = {}): events.Rule { + const rule = this.onEvent(id, { + description: `Rule triggered when Glue job ${this.jobName} is in ${jobState} state`, + ...options, + }); + rule.addEventPattern({ + detail: { + state: [jobState], + }, + }); + return rule; + } + + /** + * Create a CloudWatch Event Rule matching JobState.SUCCEEDED. + * + * @param id construct id. + * @param options optional event options. default is {}. + */ + public onSuccess(id: string, options: events.OnEventOptions = {}): events.Rule { + return this.onStateChange(id, JobState.SUCCEEDED, options); + } + + /** + * Return a CloudWatch Event Rule matching FAILED state. + * + * @param id construct id. + * @param options optional event options. default is {}. + */ + public onFailure(id: string, options: events.OnEventOptions = {}): events.Rule { + return this.onStateChange(id, JobState.FAILED, options); + } + + /** + * Return a CloudWatch Event Rule matching TIMEOUT state. + * + * @param id construct id. + * @param options optional event options. default is {}. + */ + public onTimeout(id: string, options: events.OnEventOptions = {}): events.Rule { + return this.onStateChange(id, JobState.TIMEOUT, options); + } + + /** + * Create a CloudWatch metric. + * + * @param metricName name of the metric typically prefixed with `glue.driver.`, `glue..` or `glue.ALL.`. + * @param type the metric type. + * @param props metric options. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html + */ + public metric(metricName: string, type: MetricType, props?: cloudwatch.MetricOptions): cloudwatch.Metric { + return new cloudwatch.Metric({ + metricName, + namespace: 'Glue', + dimensionsMap: { + JobName: this.jobName, + JobRunId: 'ALL', + Type: type, + }, + ...props, + }).attachTo(this); + } + + /** + * Return a CloudWatch Metric indicating job success. + * + * This metric is based on the Rule returned by no-args onSuccess() call. + */ + public metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { + return metricRule(this.metricJobStateRule('SuccessMetricRule', JobState.SUCCEEDED), props); + } + + /** + * Return a CloudWatch Metric indicating job failure. + * + * This metric is based on the Rule returned by no-args onFailure() call. + */ + public metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric { + return metricRule(this.metricJobStateRule('FailureMetricRule', JobState.FAILED), props); + } + + /** + * Return a CloudWatch Metric indicating job timeout. + * + * This metric is based on the Rule returned by no-args onTimeout() call. + */ + public metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric { + return metricRule(this.metricJobStateRule('TimeoutMetricRule', JobState.TIMEOUT), props); + } + + /** + * Creates or retrieves a singleton event rule for the input job state for use with the metric JobState methods. + * + * @param id construct id. + * @param jobState the job state. + * @private + */ + private metricJobStateRule(id: string, jobState: JobState): events.Rule { + return this.node.tryFindChild(id) as events.Rule ?? this.onStateChange(id, jobState); + } + + /** + * Returns the job arn + * @param scope + * @param jobName + */ + protected buildJobArn(scope: constructs.Construct, jobName: string) : string { + return cdk.Stack.of(scope).formatArn({ + service: 'glue', + resource: 'job', + resourceName: jobName, + }); + } +} + +/** + * A subset of Job attributes are required for importing an existing job + * into a CDK project. This is ionly used when using fromJobAttributes + * to identify and reference the existing job. + */ +export interface JobImportAttributes { + /** + * The name of the job. + */ + readonly jobName: string; + + /** + * The IAM role assumed by Glue to run this job. + * + * @default - undefined + */ + readonly role?: iam.IRole; + +} + +/** + * JobProperties will be used to create new Glue Jobs using this L2 Construct. + */ +export interface JobProperties { + + /** + * Script Code Location (required) + * Script to run when the Glue job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + **/ + readonly script: Code; + + /** + * IAM Role (required) + * IAM Role to use for Glue job execution + * Must be specified by the developer because the L2 doesn't have visibility + * into the actions the script(s) takes during the job execution + * The role must trust the Glue service principal (glue.amazonaws.com) + * and be granted sufficient permissions. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/getting-started-access.html + **/ + readonly role: iam.IRole; + + /** + * Name of the Glue job (optional) + * Developer-specified name of the Glue job + * @default - a name is automatically generated + **/ + readonly jobName?: string; + + /** + * Description (optional) + * Developer-specified description of the Glue job + * @default - no value + **/ + readonly description?: string; + + /** + * Number of Workers (optional) + * Number of workers for Glue to use during job execution + * @default 10 + */ + readonly numberOrWorkers?: number; + + /** + * Worker Type (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X + * @default G_2X + **/ + readonly workerType?: WorkerType; + + /** + * Max Concurrent Runs (optional) + * The maximum number of runs this Glue job can concurrently run + * + * An error is returned when this threshold is reached. The maximum value + * you can specify is controlled by a service limit. + * + * @default 1 + **/ + readonly maxConcurrentRuns?: number; + + /** + * Default Arguments (optional) + * The default arguments for every run of this Glue job, + * specified as name-value pairs. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + * for a list of reserved parameters + * @default - no arguments + **/ + readonly defaultArguments?: { [key: string]: string }; + + /** + * Connections (optional) + * List of connections to use for this Glue job + * Connections are used to connect to other AWS Service or resources within a VPC. + * + * @default [] - no connections are added to the job + **/ + readonly connections?: IConnection[]; + + /** + * Max Retries (optional) + * Maximum number of retry attempts Glue performs if the job fails + * @default 0 + **/ + readonly maxRetries?: number; + + /** + * Timeout (optional) + * The maximum time that a job run can consume resources before it is + * terminated and enters TIMEOUT status. Specified in minutes. + * @default 2880 (2 days for non-streaming) + * + **/ + readonly timeout?: cdk.Duration; + + /** + * Security Configuration (optional) + * Defines the encryption options for the Glue job + * @default - no security configuration. + **/ + readonly securityConfiguration?: ISecurityConfiguration; + + /** + * Tags (optional) + * A list of key:value pairs of tags to apply to this Glue job resourcex + * @default {} - no tags + **/ + readonly tags?: { [key: string]: string }; + + /** + * Glue Version + * The version of Glue to use to execute this job + * @default 3.0 for ETL + **/ + readonly glueVersion?: GlueVersion; + + /** + * Enables the collection of metrics for job profiling. + * + * @default - no profiling metrics emitted. + * + * @see `--enable-metrics` at https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + **/ + readonly enableProfilingMetrics? :boolean; + + /** + * Enables continuous logging with the specified props. + * + * @default - continuous logging is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + **/ + readonly continuousLogging?: ContinuousLoggingProps; + +} + +/** + * A Glue Job. + * @resource AWS::Glue::Job + */ +export abstract class Job extends JobBase { + + /** + * Identifies an existing Glue Job from a subset of attributes that can + * be referenced from within another Stack or Construct. + * + * @param scope The scope creating construct (usually `this`) + * @param id The construct's id. + * @param attrs Attributes for the Glue Job we want to import + */ + public static fromJobAttributes(scope: constructs.Construct, id: string, attrs: JobImportAttributes): IJob { + class Import extends JobBase { + public readonly jobName = attrs.jobName; + public readonly jobArn = this.buildJobArn(scope, attrs.jobName); + public readonly grantPrincipal = attrs.role ?? new iam.UnknownPrincipal({ resource: this }); + } + + return new Import(scope, id); + } + + /** + * The IAM role Glue assumes to run this job. + */ + public readonly abstract role: iam.IRole; + + /** + * Check no usage of reserved arguments. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + protected checkNoReservedArgs(defaultArguments?: { [key: string]: string }) { + if (defaultArguments) { + const reservedArgs = new Set(['--debug', '--mode', '--JOB_NAME']); + Object.keys(defaultArguments).forEach((arg) => { + if (reservedArgs.has(arg)) { + throw new Error(`The ${arg} argument is reserved by Glue. Don't set it`); + } + }); + } + return defaultArguments; + } + + /** + * Setup Continuous Loggiung Properties + * @param role The IAM role to use for continuous logging + * @param props The properties for continuous logging configuration + * @returns String containing the args for the continuous logging command + */ + public setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) { + const args: {[key: string]: string} = { + '--enable-continuous-cloudwatch-log': 'true', + '--enable-continuous-log-filter': (props.quiet ?? true).toString(), + }; + + if (props.logGroup) { + args['--continuous-log-logGroup'] = props.logGroup.logGroupName; + props.logGroup.grantWrite(role); + } + + if (props.logStreamPrefix) { + args['--continuous-log-logStreamPrefix'] = props.logStreamPrefix; + } + if (props.conversionPattern) { + args['--continuous-log-conversionPattern'] = props.conversionPattern; + } + return args; + } + + protected codeS3ObjectUrl(code: Code) { + const s3Location = code.bind(this, this.role).s3Location; + return `s3://${s3Location.bucketName}/${s3Location.objectKey}`; + } + +} + +/** + * Create a CloudWatch Metric that's based on Glue Job events + * {@see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types} + * The metric has namespace = 'AWS/Events', metricName = 'TriggeredRules' and RuleName = rule.ruleName dimension. + * + * @param rule for use in setting RuleName dimension value + * @param props metric properties + */ +function metricRule(rule: events.IRule, props?: cloudwatch.MetricOptions): cloudwatch.Metric { + return new cloudwatch.Metric({ + namespace: 'AWS/Events', + metricName: 'TriggeredRules', + dimensionsMap: { RuleName: rule.ruleName }, + statistic: cloudwatch.Statistic.SUM, + ...props, + }).attachTo(rule); +} + diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts new file mode 100644 index 0000000000000..9d8a8b70d3dd4 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -0,0 +1,180 @@ +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '../constants'; +import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; + +/** + * Spark ETL Jobs class + * ETL jobs support pySpark and Scala languages, for which there are separate + * but similar constructors. ETL jobs default to the G2 worker type, but you + * can override this default with other supported worker type values + * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can + * override to 3.0. The following ETL features are enabled by default: + * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * You can find more details about version, worker type and other features + * in Glue's public documentation. + */ + +/** + * Properties for creating a Python Spark ETL job + */ +export interface PySparkEtlJobProps extends JobProperties { + + /** + * Enables the Spark UI debugging and monitoring with the specified props. + * + * @default - Spark UI debugging and monitoring is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly sparkUI?: SparkUIProps; + + /** + * Extra Python Files S3 URL (optional) + * S3 URL where additional python dependencies are located + * @default - no extra files + */ + readonly extraPythonFiles?: string[]; + +} + +/** + * A Python Spark ETL Glue Job + */ +export class PySparkEtlJob extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * The Spark UI logs location if Spark UI monitoring and debugging is enabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + public readonly sparkUILoggingLocation?: SparkUILoggingLocation; + + /** + * PySparkEtlJob constructor + * + * @param scope + * @param id + * @param props + */ + constructor(scope: Construct, id: string, props: PySparkEtlJobProps) { + super(scope, id, { + physicalName: props.jobName, + }); + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Enable SparkUI by default as a best practice + const sparkUIArgs = props.sparkUI?.bucket ? this.setupSparkUI(this.role, props.sparkUI) : undefined; + this.sparkUILoggingLocation = sparkUIArgs?.location; + + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const profilingMetricsArgs = { '--enable-metrics': '' }; + + // Gather executable arguments + const execuatbleArgs = this.executableArguments(props); + + // Conbine command line arguments into a single line item + const defaultArguments = { + ...execuatbleArgs, + ...continuousLoggingArgs, + ...profilingMetricsArgs, + ...sparkUIArgs?.args, + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { + throw new Error('Both workerType and numberOrWorkers must be set'); + } + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.ETL, + scriptLocation: this.codeS3ObjectUrl(props.script), + pythonVersion: PythonVersion.THREE_NINE, + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, + numberOfWorkers: props.numberOrWorkers, + maxRetries: props.maxRetries, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + } + + /** + * Set the executable arguments with best practices enabled by default + * + * @param props + * @returns An array of arguments for Glue to use on execution + */ + private executableArguments(props: PySparkEtlJobProps) { + const args: { [key: string]: string } = {}; + args['--job-language'] = JobLanguage.PYTHON; + + // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any + if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { + //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + + // if (props.extraJars && props.extraJars?.length > 0) { + // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraFiles && props.extraFiles.length > 0) { + // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraJarsFirst) { + // args['--user-jars-first'] = 'true'; + // } + + return args; + } + + private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { + + validateSparkUiPrefix(sparkUiProps.prefix); + const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket'); + bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); + const args = { + '--enable-spark-ui': 'true', + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + }; + + return { + location: { + prefix: sparkUiProps.prefix, + bucket, + }, + args, + }; + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts new file mode 100644 index 0000000000000..d5bc6584175a6 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts @@ -0,0 +1,8 @@ +/** + * Python Shell Jobs class + * + * A Python shell job runs Python scripts as a shell and supports a Python version that + * depends on the AWS Glue version you are using. + * This can be used to schedule and run tasks that don't require an Apache Spark environment. + * + */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts new file mode 100644 index 0000000000000..fff73ebde2732 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts @@ -0,0 +1,7 @@ +/** + * Ray Jobs class + * + * Glue ray only supports worker type Z.2X and Glue version 4.0. + * Runtime will default to Ray2.3 and min workers will default to 3. + * + */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts new file mode 100644 index 0000000000000..9be9ab1859c85 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -0,0 +1,12 @@ +/** + * Spark ETL Jobs class + * ETL jobs support pySpark and Scala languages, for which there are separate + * but similar constructors. ETL jobs default to the G2 worker type, but you + * can override this default with other supported worker type values + * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can + * override to 3.0. The following ETL features are enabled by default: + * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * You can find more details about version, worker type and other features + * in Glue's public documentation. + */ + diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts new file mode 100644 index 0000000000000..52a13d3e48ca1 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts @@ -0,0 +1,10 @@ +/** + * Spark ETL Jobs class + * + * ETL jobs supports Python and Scala language. + * ETL job type supports G1, G2, G4 and G8 worker type default as G2, which customer can override. + * It wil default to the best practice version of ETL 4.0, but allow developers to override to 3.0. + * We will also default to best practice enablement the following ETL features: + * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * + */ \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui-utils.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui-utils.ts new file mode 100644 index 0000000000000..89af210af7f1d --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-ui-utils.ts @@ -0,0 +1,72 @@ +import { IBucket } from 'aws-cdk-lib/aws-s3'; +import { Token } from 'aws-cdk-lib'; +import { EOL } from 'os'; + +/** + * Properties for enabling Spark UI monitoring feature for Spark-based Glue jobs. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ +export interface SparkUIProps { + + /** + * The bucket where the Glue job stores the logs. + * + * @default a new bucket will be created. + */ + readonly bucket?: IBucket; + + /** + * The path inside the bucket (objects prefix) where the Glue job stores the logs. + * Use format `'/foo/bar'` + * + * @default - the logs will be written at the root of the bucket + */ + readonly prefix?: string; +} + +/** + * The Spark UI logging location. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ +export interface SparkUILoggingLocation { + /** + * The bucket where the Glue job stores the logs. + */ + readonly bucket: IBucket; + + /** + * The path inside the bucket (objects prefix) where the Glue job stores the logs. + * + * @default '/' - the logs will be written at the root of the bucket + */ + readonly prefix?: string; +} + +export function validateSparkUiPrefix(prefix?: string): void { + if (!prefix || Token.isUnresolved(prefix)) { + // skip validation if prefix is not specified or is a token + return; + } + + const errors: string[] = []; + + if (!prefix.startsWith('/')) { + errors.push('Prefix must begin with \'/\''); + } + + if (prefix.endsWith('/')) { + errors.push('Prefix must not end with \'/\''); + } + + if (errors.length > 0) { + throw new Error(`Invalid prefix format (value: ${prefix})${EOL}${errors.join(EOL)}`); + } +} + +export function cleanSparkUiPrefixForGrant(prefix?: string): string | undefined { + return prefix !== undefined ? prefix.slice(1) + '/*' : undefined; +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts new file mode 100644 index 0000000000000..54a2ac9150336 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts @@ -0,0 +1,13 @@ +/** + * Streaming Jobs class + * + * A Streaming job is similar to an ETL job, except that it performs ETL on data streams + * using the Apache Spark Structured Streaming framework. + * These jobs will default to use Python 3.9. + * + * Similar to ETL jobs, streaming job supports Scala and Python languages. Similar to ETL, + * it supports G1 and G2 worker type and 2.0, 3.0 and 4.0 version. We’ll default to G2 worker + * and 4.0 version for streaming jobs which developers can override. + * We will enable —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * + */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-triggers.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-triggers.ts new file mode 100644 index 0000000000000..487bf3b1ed291 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-triggers.ts @@ -0,0 +1,7 @@ +/** + * Conditional Trigger Class + * + * Conditional triggers have a predicate and actions associated with them. + * When the predicateCondition is true, the trigger actions will be executed. + * + */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts new file mode 100644 index 0000000000000..aaed7b7b623c8 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts @@ -0,0 +1,10 @@ +/** + * Notify Event Trigger Class + * + * Workflows are mandatory for this trigger type. There are two types of notify event triggers, + * batching and non-batching trigger. + * For batching triggers, developers must specify BatchSize but for non-batching BatchSize will + * be set to 1. + * For both triggers, BatchWindow will be default to 900 seconds. + * + */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts new file mode 100644 index 0000000000000..f9aa131a1f7d2 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts @@ -0,0 +1,8 @@ +/** + * On Demand Trigger Class + * + * On demand triggers can start glue jobs or crawlers. + * The trigger method will take an optional description but abstract the requirement of an + * actions list using the job or crawler objects using conditional types. + * + */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts new file mode 100644 index 0000000000000..c34e61330a519 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts @@ -0,0 +1,12 @@ +/** + * Scheduled Trigger Base Class + * + * Schedule triggers are a way for developers to create jobs using cron expressions. + * We’ll provide daily, weekly, and monthly convenience functions, as well as a custom function + * that will allow developers to create their own custom timing using the existing + * event Schedule object without having to build their own cron expressions. + * + * The trigger method will take an optional description and list of Actions + * which can refer to Jobs or crawlers via conditional types. + * + */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts new file mode 100644 index 0000000000000..c40c3d0efe805 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts @@ -0,0 +1,7 @@ +/** + * Workflow Trigger Base Class + * + * In AWS Glue, developers can use workflows to create and visualize complex extract, + * transform, and load (ETL) activities involving multiple crawlers, jobs, and triggers. + * + */ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts index 9b213cd891134..8d40a1aae9a8c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts @@ -21,7 +21,7 @@ describe('Code', () => { test('with valid bucket name and key and bound by job sets the right path and grants the job permissions to read from it', () => { bucket = s3.Bucket.fromBucketName(stack, 'Bucket', 'bucketname'); script = glue.Code.fromBucket(bucket, key); - new glue.Job(stack, 'Job1', { + new glue.JobLegacy(stack, 'Job1', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, @@ -93,7 +93,7 @@ describe('Code', () => { }); test("with valid and existing file path and bound to job sets job's script location and permissions stack metadata", () => { - new glue.Job(stack, 'Job1', { + new glue.JobLegacy(stack, 'Job1', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, @@ -205,14 +205,14 @@ describe('Code', () => { }); test('used in more than 1 job in the same stack should be reused', () => { - new glue.Job(stack, 'Job1', { + new glue.JobLegacy(stack, 'Job1', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, script, }), }); - new glue.Job(stack, 'Job2', { + new glue.JobLegacy(stack, 'Job2', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, @@ -285,7 +285,7 @@ describe('Code', () => { }); test('throws if trying to rebind in another stack', () => { - new glue.Job(stack, 'Job1', { + new glue.JobLegacy(stack, 'Job1', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, @@ -294,7 +294,7 @@ describe('Code', () => { }); const differentStack = new cdk.Stack(); - expect(() => new glue.Job(differentStack, 'Job2', { + expect(() => new glue.JobLegacy(differentStack, 'Job2', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/constants.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/constants.test.ts new file mode 100644 index 0000000000000..2b1a68680f82d --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/constants.test.ts @@ -0,0 +1,95 @@ +import * as glue from '../lib'; + +describe('WorkerType', () => { + test('.STANDARD should set the name correctly', () => expect(glue.WorkerType.STANDARD).toEqual('Standard')); + + test('.G_1X should set the name correctly', () => expect(glue.WorkerType.G_1X).toEqual('G.1X')); + + test('.G_2X should set the name correctly', () => expect(glue.WorkerType.G_2X).toEqual('G.2X')); + + test('.G_4X should set the name correctly', () => expect(glue.WorkerType.G_4X).toEqual('G.4X')); + + test('.G_8X should set the name correctly', () => expect(glue.WorkerType.G_8X).toEqual('G.8X')); + + test('.G_025X should set the name correctly', () => expect(glue.WorkerType.G_025X).toEqual('G.025X')); + + test('.Z_2X should set the name correctly', () => expect(glue.WorkerType.Z_2X).toEqual('Z.2X')); + +}); + +describe('JobState', () => { + test('SUCCEEDED should set Job State correctly', () => expect(glue.JobState.SUCCEEDED).toEqual('SUCCEEDED')); + + test('FAILED should set Job State correctly', () => expect(glue.JobState.FAILED).toEqual('FAILED')); + + test('RUNNING should set Job State correctly', () => expect(glue.JobState.RUNNING).toEqual('RUNNING')); + + test('STARTING should set Job State correctly', () => expect(glue.JobState.STARTING).toEqual('STARTING')); + + test('STOPPED should set Job State correctly', () => expect(glue.JobState.STOPPED).toEqual('STOPPED')); + + test('STOPPING should set Job State correctly', () => expect(glue.JobState.STOPPING).toEqual('STOPPING')); + + test('TIMEOUT should set Job State correctly', () => expect(glue.JobState.TIMEOUT).toEqual('TIMEOUT')); + +}); + +describe('Metric Type', () => { + test('GAUGE should set Metric Type correctly', () => expect(glue.MetricType.GAUGE).toEqual('gauge')); + + test('COUNT should set Metric Type correctly', () => expect(glue.MetricType.COUNT).toEqual('count')); + +}); + +describe('Execution Class', () => { + test('FLEX should set Execution Class correctly', () => expect(glue.ExecutionClass.FLEX).toEqual('FLEX')); + + test('STANDARD should set Execution Class correctly', () => expect(glue.ExecutionClass.STANDARD).toEqual('STANDARD')); + +}); + +describe('Glue Version', () => { + test('V0_9 should set Glue Version correctly', () => expect(glue.GlueVersion.V0_9).toEqual('0.9')); + + test('V1_0 should set Glue Version correctly', () => expect(glue.GlueVersion.V1_0).toEqual('1.0')); + + test('V2_0 should set Glue Version correctly', () => expect(glue.GlueVersion.V2_0).toEqual('2.0')); + + test('V3_0 should set Glue Version correctly', () => expect(glue.GlueVersion.V3_0).toEqual('3.0')); + + test('V4_0 should set Glue Version correctly', () => expect(glue.GlueVersion.V4_0).toEqual('4.0')); + +}); + +describe('Job Language', () => { + test('PYTHON should set Job Language correctly', () => expect(glue.JobLanguage.PYTHON).toEqual('python')); + + test('SCALA should set Job Language correctly', () => expect(glue.JobLanguage.SCALA).toEqual('scala')); + +}); + +describe('Python Version', () => { + test('TWO should set Python Version correctly', () => expect(glue.PythonVersion.TWO).toEqual('2')); + + test('THREE should set Python Version correctly', () => expect(glue.PythonVersion.THREE).toEqual('3')); + + test('THREE_NINE should set Python Version correctly', () => expect(glue.PythonVersion.THREE_NINE).toEqual('3.9')); + +}); + +describe('Runtime', () => { + test('RAY_TWO_FOUR should set Runtime correctly', () => expect(glue.Runtime.RAY_TWO_FOUR).toEqual('Ray2.4')); + +}); + +describe('JobType', () => { + test('ETL should set Runtime correctly', () => expect(glue.JobType.ETL).toEqual('glueetl')); + + test('PYTHON_SHELL should set Runtime correctly', () => expect(glue.JobType.PYTHON_SHELL).toEqual('pythonshell')); + + test('RAY should set Runtime correctly', () => expect(glue.JobType.RAY).toEqual('glueray')); + + test('STREAMING should set Runtime correctly', () => expect(glue.JobType.STREAMING).toEqual('gluestreaming')); + +}); + diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts index a08a19713b9a7..6e50800b2ebca 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts @@ -24,7 +24,7 @@ const stack = new cdk.Stack(app, 'aws-glue-job-python-shell'); const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); -new glue.Job(stack, 'ShellJob', { +new glue.JobLegacy(stack, 'ShellJob', { jobName: 'ShellJob', executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, @@ -41,7 +41,7 @@ new glue.Job(stack, 'ShellJob', { maxCapacity: 0.0625, }); -new glue.Job(stack, 'ShellJob39', { +new glue.JobLegacy(stack, 'ShellJob39', { jobName: 'ShellJob39', executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V3_0, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts index c16b5f9691ae1..af502a5b12237 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts @@ -24,8 +24,8 @@ const stack = new cdk.Stack(app, 'aws-glue-job'); const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); [glue.GlueVersion.V2_0, glue.GlueVersion.V3_0, glue.GlueVersion.V4_0].forEach((glueVersion) => { - const etlJob = new glue.Job(stack, 'EtlJob' + glueVersion.name, { - jobName: 'EtlJob' + glueVersion.name, + const etlJob = new glue.JobLegacy(stack, 'EtlJob' + glueVersion, { + jobName: 'EtlJob' + glueVersion, executable: glue.JobExecutable.pythonEtl({ pythonVersion: glue.PythonVersion.THREE, glueVersion, @@ -42,9 +42,9 @@ const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_wor 'arg2': 'value2', '--conf': 'valueConf', }, - sparkUI: { - enabled: true, - }, + // sparkUI: { + // enabled: true, + // }, continuousLogging: { enabled: true, quiet: true, @@ -56,8 +56,8 @@ const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_wor }, }); etlJob.metricSuccess(); - new glue.Job(stack, 'StreamingJob' + glueVersion.name, { - jobName: 'StreamingJob' + glueVersion.name, + new glue.JobLegacy(stack, 'StreamingJob' + glueVersion, { + jobName: 'StreamingJob' + glueVersion, executable: glue.JobExecutable.pythonStreaming({ pythonVersion: glue.PythonVersion.THREE, glueVersion, @@ -75,7 +75,7 @@ const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_wor }); }); -new glue.Job(stack, 'ShellJob', { +new glue.JobLegacy(stack, 'ShellJob', { jobName: 'ShellJob', executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, @@ -91,7 +91,7 @@ new glue.Job(stack, 'ShellJob', { }, }); -new glue.Job(stack, 'ShellJob39', { +new glue.JobLegacy(stack, 'ShellJob39', { jobName: 'ShellJob39', executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, @@ -107,7 +107,7 @@ new glue.Job(stack, 'ShellJob39', { }, }); -new glue.Job(stack, 'RayJob', { +new glue.JobLegacy(stack, 'RayJob', { jobName: 'RayJob', executable: glue.JobExecutable.pythonRay({ glueVersion: glue.GlueVersion.V4_0, @@ -126,7 +126,7 @@ new glue.Job(stack, 'RayJob', { }, }); -new glue.Job(stack, 'EtlJobWithFLEX', { +new glue.JobLegacy(stack, 'EtlJobWithFLEX', { jobName: 'EtlJobWithFLEX', executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts index 28c6225c542b6..43028fc5109d7 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts @@ -2,40 +2,6 @@ import * as s3 from 'aws-cdk-lib/aws-s3'; import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; -describe('GlueVersion', () => { - test('.V0_9 should set the name correctly', () => expect(glue.GlueVersion.V0_9.name).toEqual('0.9')); - - test('.V1_0 should set the name correctly', () => expect(glue.GlueVersion.V1_0.name).toEqual('1.0')); - - test('.V2_0 should set the name correctly', () => expect(glue.GlueVersion.V2_0.name).toEqual('2.0')); - - test('.V3_0 should set the name correctly', () => expect(glue.GlueVersion.V3_0.name).toEqual('3.0')); - - test('.V4_0 should set the name correctly', () => expect(glue.GlueVersion.V4_0.name).toEqual('4.0')); - - test('of(customVersion) should set the name correctly', () => expect(glue.GlueVersion.of('CustomVersion').name).toEqual('CustomVersion')); -}); - -describe('PythonVersion', () => { - test('.TWO should set the name correctly', () => expect(glue.PythonVersion.TWO).toEqual('2')); - - test('.THREE should set the name correctly', () => expect(glue.PythonVersion.THREE).toEqual('3')); - - test('.THREE_NINE should set the name correctly', () => expect(glue.PythonVersion.THREE_NINE).toEqual('3.9')); -}); - -describe('JobType', () => { - test('.ETL should set the name correctly', () => expect(glue.JobType.ETL.name).toEqual('glueetl')); - - test('.STREAMING should set the name correctly', () => expect(glue.JobType.STREAMING.name).toEqual('gluestreaming')); - - test('.PYTHON_SHELL should set the name correctly', () => expect(glue.JobType.PYTHON_SHELL.name).toEqual('pythonshell')); - - test('.RAY should set the name correctly', () => expect(glue.JobType.RAY.name).toEqual('glueray')); - - test('of(customName) should set the name correctly', () => expect(glue.JobType.of('CustomName').name).toEqual('CustomName')); -}); - describe('JobExecutable', () => { let stack: cdk.Stack; let bucket: s3.IBucket; @@ -67,19 +33,10 @@ describe('JobExecutable', () => { })).toThrow(/Python shell requires the language to be set to Python/); }); - test('with JobType.of("pythonshell") and a language other than JobLanguage.PYTHON should throw', () => { - expect(() => glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V3_0, - type: glue.JobType.of('pythonshell'), - language: glue.JobLanguage.SCALA, - script, - })).toThrow(/Python shell requires the language to be set to Python/); - }); - test('with JobType.of("glueray") and a language other than JobLanguage.PYTHON should throw', () => { expect(() => glue.JobExecutable.of({ glueVersion: glue.GlueVersion.V4_0, - type: glue.JobType.of('glueray'), + type: glue.JobType.RAY, language: glue.JobLanguage.SCALA, script, })).toThrow(/Ray requires the language to be set to Python/); @@ -113,19 +70,19 @@ describe('JobExecutable', () => { pythonVersion: glue.PythonVersion.TWO, script, glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Python Shell`); + })).toThrow(`Specified GlueVersion ${glueVersion} does not support Python Shell`); }); }); [glue.GlueVersion.V0_9, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with JobType.PYTHON_SHELL and GlueVersion.of("${glueVersion.name}") should throw`, () => { + test(`with JobType.PYTHON_SHELL and GlueVersion.of("${glueVersion}") should throw`, () => { expect(() => glue.JobExecutable.of({ type: glue.JobType.PYTHON_SHELL, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.TWO, script, - glueVersion: glue.GlueVersion.of(glueVersion.name), - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Python Shell`); + glueVersion: glueVersion, + })).toThrow(`Specified GlueVersion ${glueVersion} does not support Python Shell`); }); }); @@ -137,24 +94,24 @@ describe('JobExecutable', () => { pythonVersion: glue.PythonVersion.TWO, script, glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Ray`); + })).toThrow(`Specified GlueVersion ${glueVersion} does not support Ray`); }); }); [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0, glue.GlueVersion.V2_0, glue.GlueVersion.V3_0].forEach((glueVersion) => { test(`with JobType.of("glueray") and GlueVersion ${glueVersion} should throw`, () => { expect(() => glue.JobExecutable.of({ - type: glue.JobType.of('glueray'), + type: glue.JobType.RAY, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.TWO, script, glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Ray`); + })).toThrow(`Specified GlueVersion ${glueVersion} does not support Ray`); }); }); [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0].forEach((glueVersion) => { - test(`with extraJarsFirst set and GlueVersion ${glueVersion.name} should throw`, () => { + test(`with extraJarsFirst set and GlueVersion ${glueVersion} should throw`, () => { expect(() => glue.JobExecutable.of({ type: glue.JobType.ETL, language: glue.JobLanguage.PYTHON, @@ -162,20 +119,20 @@ describe('JobExecutable', () => { extraJarsFirst: true, script, glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support extraJarsFirst`); + })).toThrow(`Specified GlueVersion ${glueVersion} does not support extraJarsFirst`); }); }); [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0].forEach((glueVersion) => { - test(`with extraJarsFirst set and GlueVersion.of("${glueVersion.name}") should throw`, () => { + test(`with extraJarsFirst set and GlueVersion.of("${glueVersion}") should throw`, () => { expect(() => glue.JobExecutable.of({ type: glue.JobType.ETL, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.TWO, extraJarsFirst: true, script, - glueVersion: glue.GlueVersion.of(glueVersion.name), - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support extraJarsFirst`); + glueVersion: glueVersion, + })).toThrow(`Specified GlueVersion ${glueVersion} does not support extraJarsFirst`); }); }); @@ -187,19 +144,19 @@ describe('JobExecutable', () => { pythonVersion: glue.PythonVersion.TWO, script, glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support PythonVersion 2`); + })).toThrow(`Specified GlueVersion ${glueVersion} does not support PythonVersion 2`); }); }); [glue.GlueVersion.V2_0, glue.GlueVersion.V3_0, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with PythonVersion.TWO and GlueVersion.of("${glueVersion.name}") should throw`, () => { + test(`with PythonVersion.TWO and GlueVersion.of("${glueVersion}") should throw`, () => { expect(() => glue.JobExecutable.of({ type: glue.JobType.ETL, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.TWO, script, - glueVersion: glue.GlueVersion.of(glueVersion.name), - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support PythonVersion 2`); + glueVersion: glueVersion, + })).toThrow(`Specified GlueVersion ${glueVersion} does not support PythonVersion 2`); }); }); @@ -225,7 +182,7 @@ describe('JobExecutable', () => { test('with PythonVersion PythonVersion.THREE_NINE and JobType.of("pythonshell") should succeed', () => { expect(glue.JobExecutable.of({ - type: glue.JobType.of('pythonshell'), + type: glue.JobType.PYTHON_SHELL, glueVersion: glue.GlueVersion.V1_0, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.THREE_NINE, @@ -246,7 +203,7 @@ describe('JobExecutable', () => { test('with PythonVersion PythonVersion.THREE_NINE and JobTypeof("glueray") should succeed', () => { expect(glue.JobExecutable.of({ - type: glue.JobType.of('glueray'), + type: glue.JobType.RAY, glueVersion: glue.GlueVersion.V4_0, language: glue.JobLanguage.PYTHON, pythonVersion: glue.PythonVersion.THREE_NINE, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts index 748d89b5668a2..b783edfd2f181 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts @@ -8,24 +8,6 @@ import * as s3 from 'aws-cdk-lib/aws-s3'; import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; -describe('WorkerType', () => { - test('.STANDARD should set the name correctly', () => expect(glue.WorkerType.STANDARD.name).toEqual('Standard')); - - test('.G_1X should set the name correctly', () => expect(glue.WorkerType.G_1X.name).toEqual('G.1X')); - - test('.G_2X should set the name correctly', () => expect(glue.WorkerType.G_2X.name).toEqual('G.2X')); - - test('.G_4X should set the name correctly', () => expect(glue.WorkerType.G_4X.name).toEqual('G.4X')); - - test('.G_8X should set the name correctly', () => expect(glue.WorkerType.G_8X.name).toEqual('G.8X')); - - test('.G_025X should set the name correctly', () => expect(glue.WorkerType.G_025X.name).toEqual('G.025X')); - - test('.Z_2X should set the name correctly', () => expect(glue.WorkerType.Z_2X.name).toEqual('Z.2X')); - - test('of(customType) should set name correctly', () => expect(glue.WorkerType.of('CustomType').name).toEqual('CustomType')); -}); - describe('Job', () => { const jobName = 'test-job'; let stack: cdk.Stack; @@ -103,8 +85,8 @@ describe('Job', () => { let extraJars: glue.Code[]; let extraFiles: glue.Code[]; let extraPythonFiles: glue.Code[]; - let job: glue.Job; - let defaultProps: glue.JobProps; + let job: glue.JobLegacy; + let defaultProps: glue.JobLegacyProps; beforeEach(() => { codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', codeBucketName); @@ -123,7 +105,7 @@ describe('Job', () => { describe('with necessary props only', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', defaultProps); + job = new glue.JobLegacy(stack, 'Job', defaultProps); }); test('should create a role and use it with the job', () => { @@ -194,7 +176,7 @@ describe('Job', () => { test('with a custom role should use it and set it in CloudFormation', () => { const role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); - job = new glue.Job(stack, 'JobWithRole', { + job = new glue.JobLegacy(stack, 'JobWithRole', { ...defaultProps, role, }); @@ -206,7 +188,7 @@ describe('Job', () => { }); test('with a custom jobName should set it in CloudFormation', () => { - job = new glue.Job(stack, 'JobWithName', { + job = new glue.JobLegacy(stack, 'JobWithName', { ...defaultProps, jobName, }); @@ -219,7 +201,7 @@ describe('Job', () => { describe('enabling continuous logging with defaults', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, continuousLogging: { enabled: true }, }); @@ -240,7 +222,7 @@ describe('Job', () => { beforeEach(() => { logGroup = logs.LogGroup.fromLogGroupName(stack, 'LogGroup', 'LogGroupName'); - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, continuousLogging: { enabled: true, @@ -310,7 +292,7 @@ describe('Job', () => { describe('enabling execution class', () => { describe('enabling execution class with FLEX', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE, @@ -329,7 +311,7 @@ describe('Job', () => { describe('enabling execution class with FLEX and WorkerType G_1X', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE, @@ -351,7 +333,7 @@ describe('Job', () => { describe('enabling execution class with FLEX and WorkerType G_2X', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE, @@ -373,7 +355,7 @@ describe('Job', () => { describe('enabling execution class with STANDARD', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE, @@ -390,85 +372,14 @@ describe('Job', () => { }); }); - describe('errors for execution class with FLEX', () => { - test('job type except JobType.ETL should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for JobType.ETL jobs'); - }); - - test('with glue version 0.9 should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V0_9, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); - }); - - test('with glue version 1.0 should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); - }); - - test('with glue version 2.0 should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); - }); - - test('with G_025X as worker type that is neither G_1X nor G_2X should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - workerType: glue.WorkerType.G_025X, - workerCount: 2, - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for WorkerType G_1X or G_2X'); - }); - - test('with G_4X as worker type that is neither G_1X nor G_2X should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - workerType: glue.WorkerType.G_4X, - workerCount: 10, - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for WorkerType G_1X or G_2X'); - }); - }); }); describe('enabling spark ui', () => { describe('with no bucket or path provided', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, - sparkUI: { enabled: true }, + sparkUI: { }, }); }); @@ -556,10 +467,9 @@ describe('Job', () => { beforeEach(() => { sparkUIBucket = s3.Bucket.fromBucketName(stack, 'SparkBucketId', sparkUIBucketName); - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, sparkUI: { - enabled: true, bucket: sparkUIBucket, }, }); @@ -642,10 +552,9 @@ describe('Job', () => { 'Prefix must end with \'/\'', ].join(EOL); it('fails if path is mis-formatted', () => { - expect(() => new glue.Job(stack, 'BadPrefixJob', { + expect(() => new glue.JobLegacy(stack, 'BadPrefixJob', { ...defaultProps, sparkUI: { - enabled: true, bucket: sparkUIBucket, prefix: badPrefix, }, @@ -654,10 +563,9 @@ describe('Job', () => { beforeEach(() => { sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName); - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, sparkUI: { - enabled: true, bucket: sparkUIBucket, prefix: prefix, }, @@ -727,7 +635,7 @@ describe('Job', () => { describe('with extended props', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { ...defaultProps, jobName, description: 'test job', @@ -800,7 +708,7 @@ describe('Job', () => { const defaultArguments: {[key: string]: string} = {}; defaultArguments[arg] = 'random value'; - expect(() => new glue.Job(stack, `Job${index}`, { + expect(() => new glue.JobLegacy(stack, `Job${index}`, { executable: glue.JobExecutable.scalaEtl({ glueVersion: glue.GlueVersion.V2_0, className, @@ -813,7 +721,7 @@ describe('Job', () => { describe('shell job', () => { test('with unsupported glue version should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V0_9, pythonVersion: glue.PythonVersion.TWO, @@ -823,20 +731,20 @@ describe('Job', () => { }); test('with unsupported Spark UI prop should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonShell({ glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE, script, }), - sparkUI: { enabled: true }, + sparkUI: { }, })).toThrow('Spark UI is not available for JobType.PYTHON_SHELL'); }); }); describe('ray job', () => { test('with unsupported glue version should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonRay({ glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE_NINE, @@ -849,7 +757,7 @@ describe('Job', () => { }); test('with unsupported Spark UI prop should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonRay({ glueVersion: glue.GlueVersion.V4_0, pythonVersion: glue.PythonVersion.THREE_NINE, @@ -858,12 +766,12 @@ describe('Job', () => { }), workerType: glue.WorkerType.Z_2X, workerCount: 2, - sparkUI: { enabled: true }, + sparkUI: { }, })).toThrow('Spark UI is not available for JobType.RAY'); }); test('without runtime should throw', () => { - expect(() => new glue.Job(stack, 'Job', { + expect(() => new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonRay({ glueVersion: glue.GlueVersion.V4_0, pythonVersion: glue.PythonVersion.THREE_NINE, @@ -876,7 +784,7 @@ describe('Job', () => { }); test('etl job with all props should synthesize correctly', () => { - new glue.Job(stack, 'Job', { + new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.pythonEtl({ glueVersion: glue.GlueVersion.V2_0, pythonVersion: glue.PythonVersion.THREE, @@ -912,7 +820,7 @@ describe('Job', () => { }); test('streaming job with all props should synthesize correctly', () => { - new glue.Job(stack, 'Job', { + new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.scalaStreaming({ glueVersion: glue.GlueVersion.V2_0, extraJarsFirst: true, @@ -947,7 +855,7 @@ describe('Job', () => { describe('event rules and rule-based metrics', () => { beforeEach(() => { - job = new glue.Job(stack, 'Job', { + job = new glue.JobLegacy(stack, 'Job', { executable: glue.JobExecutable.scalaEtl({ glueVersion: glue.GlueVersion.V2_0, className, @@ -981,9 +889,9 @@ describe('Job', () => { }); [ - { name: 'onSuccess()', invoke: (testJob: glue.Job) => testJob.onSuccess('SuccessRule'), state: 'SUCCEEDED' }, - { name: 'onFailure()', invoke: (testJob: glue.Job) => testJob.onFailure('FailureRule'), state: 'FAILED' }, - { name: 'onTimeout()', invoke: (testJob: glue.Job) => testJob.onTimeout('TimeoutRule'), state: 'TIMEOUT' }, + { name: 'onSuccess()', invoke: (testJob: glue.JobLegacy) => testJob.onSuccess('SuccessRule'), state: 'SUCCEEDED' }, + { name: 'onFailure()', invoke: (testJob: glue.JobLegacy) => testJob.onFailure('FailureRule'), state: 'FAILED' }, + { name: 'onTimeout()', invoke: (testJob: glue.JobLegacy) => testJob.onTimeout('TimeoutRule'), state: 'TIMEOUT' }, ].forEach((testCase) => { test(`${testCase.name} should create a rule with correct properties`, () => { testCase.invoke(job); @@ -1026,9 +934,9 @@ describe('Job', () => { }); [ - { name: '.metricSuccess()', invoke: (testJob: glue.Job) => testJob.metricSuccess(), state: 'SUCCEEDED', ruleId: 'SuccessMetricRule' }, - { name: '.metricFailure()', invoke: (testJob: glue.Job) => testJob.metricFailure(), state: 'FAILED', ruleId: 'FailureMetricRule' }, - { name: '.metricTimeout()', invoke: (testJob: glue.Job) => testJob.metricTimeout(), state: 'TIMEOUT', ruleId: 'TimeoutMetricRule' }, + { name: '.metricSuccess()', invoke: (testJob: glue.JobLegacy) => testJob.metricSuccess(), state: 'SUCCEEDED', ruleId: 'SuccessMetricRule' }, + { name: '.metricFailure()', invoke: (testJob: glue.JobLegacy) => testJob.metricFailure(), state: 'FAILED', ruleId: 'FailureMetricRule' }, + { name: '.metricTimeout()', invoke: (testJob: glue.JobLegacy) => testJob.metricTimeout(), state: 'TIMEOUT', ruleId: 'TimeoutMetricRule' }, ].forEach((testCase) => { test(`${testCase.name} should create the expected singleton event rule and corresponding metric`, () => { const metric = testCase.invoke(job); @@ -1117,63 +1025,5 @@ describe('Job', () => { }); }); - describe('validation for maxCapacity and workerType', () => { - test('maxCapacity with workerType and workerCount should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - maxCapacity: 10, - workerType: glue.WorkerType.G_1X, - workerCount: 10, - })).toThrow('maxCapacity cannot be used when setting workerType and workerCount'); - }); - - test('maxCapacity with GlueVersion 2.0 or later should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - maxCapacity: 10, - })).toThrow('maxCapacity cannot be used when GlueVersion 2.0 or later'); - }); - - test('maxCapacity with Python Shell jobs validation', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - maxCapacity: 10, - })).toThrow(/maxCapacity value must be either 0.0625 or 1 for JobType.PYTHON_SHELL jobs/); - }); - - test('workerType without workerCount should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - workerType: glue.WorkerType.G_1X, - })).toThrow('Both workerType and workerCount must be set'); - }); - - test('workerCount without workerType should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - workerCount: 10, - })).toThrow('Both workerType and workerCount must be set'); - }); - }); }); }); From 2632f78fb0e8fd252c42ba33b5ec0ef6f6991523 Mon Sep 17 00:00:00 2001 From: Askar Serikov Date: Thu, 29 Feb 2024 16:30:50 +0000 Subject: [PATCH 02/51] added Ray jobs --- packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 2 +- .../aws-glue-alpha/lib/jobs/ray-job.ts | 99 ++++++++++++++++++- 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index 5c5d13f9b5c76..f0726ebedf269 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -17,7 +17,7 @@ export * from './jobs/job'; // export * from './jobs/flex-job'; export * from './jobs/pyspark-etl-job'; // export * from './jobs/python-shell-job'; -// export * from './jobs/ray-job'; +export * from './jobs/ray-job'; // export * from './jobs/scala-spark-etl-job'; export * from './jobs/spark-ui-utils'; // export * from './jobs/spark-etl-job'; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts index fff73ebde2732..12cf626d33d92 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts @@ -1,7 +1,104 @@ +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, WorkerType, Runtime } from '../constants'; + /** * Ray Jobs class * * Glue ray only supports worker type Z.2X and Glue version 4.0. - * Runtime will default to Ray2.3 and min workers will default to 3. + * Runtime will default to Ray2.4 and min workers will default to 3. * */ + +/** + * Properties for creating a Ray Glue job + */ +export interface RayJobProps extends JobProperties { + /** + * Sets the Ray runtime environment version + * + * @default - Runtime version will default to Ray2.4 + */ + readonly runtime?: Runtime; +} + +/** + * A Ray Glue Job + */ +export class RayJob extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * RayJob constructor + * + * @param scope + * @param id + * @param props + */ + + constructor(scope: Construct, id: string, props: RayJobProps) { + super(scope, id, { + physicalName: props.jobName, + }); + + // List of supported Glue versions by Ray + const supportedGlueVersions = [ + GlueVersion.V4_0, + ]; + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Combine command line arguments into a single line item + const defaultArguments = { + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + if (props.workerType && props.workerType !== WorkerType.Z_2X) { + throw new Error('Ray jobs only support Z.2X worker type'); + }; + + if (props.glueVersion && !(supportedGlueVersions.includes(props.glueVersion))) { + throw new Error('You must set GlueVersion to 4.0 or greater'); + }; + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.RAY, + scriptLocation: this.codeS3ObjectUrl(props.script), + runtime: props.runtime ? props.runtime : Runtime.RAY_TWO_FOUR, + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, + workerType: props.workerType ? props.workerType : WorkerType.Z_2X, + numberOfWorkers: props.numberOrWorkers, + maxRetries: props.maxRetries, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + + } + +} \ No newline at end of file From c67cd81aa8d56eaa9b3cf530263be49c12b84a76 Mon Sep 17 00:00:00 2001 From: Chris Williams Date: Tue, 5 Mar 2024 09:01:55 +0000 Subject: [PATCH 03/51] Create raw CDK assets for Python Shell Jobs --- .../@aws-cdk/aws-glue-alpha/lib/constants.ts | 15 +++ packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 2 +- .../lib/jobs/python-shell-job.ts | 105 ++++++++++++++++++ .../test/integ.job-python-shell.ts | 49 ++++---- .../aws-glue-alpha/test/python-shell-job.ts | 42 +++++++ 5 files changed, 187 insertions(+), 26 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts index 7b1cfd7896fdf..00ece58f75d75 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts @@ -238,3 +238,18 @@ export enum JobType { } +/** + * The number of AWS Glue data processing units (DPUs) that can be allocated when this job runs. A DPU is a relative measure of processing power that consists of 4 vCPUs of compute capacity and 16 GB of memory. + */ +export enum MaxCapacity { + + /** + * DPU value of 1/16th + */ + DPU_1_16TH = 0.0625, + + /** + * DPU value of 1 + */ + DPU_1 = 1, +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index 5c5d13f9b5c76..daec73ce32499 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -16,7 +16,7 @@ export * from './constants'; export * from './jobs/job'; // export * from './jobs/flex-job'; export * from './jobs/pyspark-etl-job'; -// export * from './jobs/python-shell-job'; +export * from './jobs/python-shell-job'; // export * from './jobs/ray-job'; // export * from './jobs/scala-spark-etl-job'; export * from './jobs/spark-ui-utils'; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts index d5bc6584175a6..9e70496613f9f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts @@ -1,3 +1,9 @@ +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, PythonVersion, MaxCapacity } from '../constants'; + /** * Python Shell Jobs class * @@ -6,3 +12,102 @@ * This can be used to schedule and run tasks that don't require an Apache Spark environment. * */ + +/** + * Properties for creating a Python Shell job + */ +export interface PythonShellJobProperties extends JobProperties { + /** + * Python Version + * The version of Python to use to execute this job + * @default 3.9 for Shell Jobs + **/ + readonly pythonVersion?: PythonVersion; + + /** + * The total number of DPU to assign to the Python Job + * @default 0.0625 + */ + readonly maxCapacity?: MaxCapacity; +} + +/** + * A Python Shell Glue Job + */ +export class PythonShellJob extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * PythonShellJob constructor + * + * @param scope + * @param id + * @param props + */ + constructor(scope: Construct, id: string, props: PythonShellJobProperties) { + super(scope, id, { physicalName: props.jobName }); + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Gather executable arguments + const executableArgs = this.executableArguments(props); + + // Combine command line arguments into a single line item + const defaultArguments = { + ...executableArgs, + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.PYTHON_SHELL, + scriptLocation: this.codeS3ObjectUrl(props.script), + pythonVersion: props.pythonVersion ? props.pythonVersion : PythonVersion.THREE_NINE, + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, + maxCapacity: props.maxCapacity ? props.maxCapacity : MaxCapacity.DPU_1_16TH, + maxRetries: props.maxRetries ? props.maxRetries : 0, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + } + + /** + * Set the executable arguments with best practices enabled by default + * + * @param props + * @returns An array of arguments for Glue to use on execution + */ + private executableArguments(props: PythonShellJobProperties) { + const args: { [key: string]: string } = {}; + + //If no Python version set (default 3.9) or the version is set to 3.9 then set library-set argument + if (!props.pythonVersion || props.pythonVersion == PythonVersion.THREE_NINE) { + //Selecting this option includes common libraries for Python 3.9 + args['library-set'] = 'analytics'; + } + + return args; + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts index 6e50800b2ebca..c5ecc8eafb631 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts @@ -2,6 +2,7 @@ import * as integ from '@aws-cdk/integ-tests-alpha'; import * as path from 'path'; import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; /** * To verify the ability to run jobs created in this test @@ -22,32 +23,31 @@ const app = new cdk.App(); const stack = new cdk.Stack(app, 'aws-glue-job-python-shell'); -const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); +const script = glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')); -new glue.JobLegacy(stack, 'ShellJob', { - jobName: 'ShellJob', - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - tags: { - key: 'value', - }, - maxCapacity: 0.0625, +const iam_role = new iam.Role(stack, 'IAMServiceRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], +}); + +new glue.PythonShellJob(stack, 'BasicShellJob39', { + script: script, + role: iam_role, +}); + +new glue.PythonShellJob(stack, 'BasicShellJob', { + script: script, + role: iam_role, + pythonVersion: glue.PythonVersion.THREE, + glueVersion: glue.GlueVersion.V1_0, }); -new glue.JobLegacy(stack, 'ShellJob39', { - jobName: 'ShellJob39', - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - }), +new glue.PythonShellJob(stack, 'DetailedShellJob39', { + script: script, + role: iam_role, + description: 'My detailed Python 3.9 Shell Job', + maxCapacity: glue.MaxCapacity.DPU_1, + jobName: 'My Python 3.9 Shell Job', defaultArguments: { arg1: 'value1', arg2: 'value2', @@ -55,11 +55,10 @@ new glue.JobLegacy(stack, 'ShellJob39', { tags: { key: 'value', }, - maxCapacity: 1.0, }); new integ.IntegTest(app, 'aws-glue-job-python-shell-integ-test', { testCases: [stack], }); -app.synth(); +app.synth(); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.ts b/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.ts new file mode 100644 index 0000000000000..d8d5eaeaf40ba --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.ts @@ -0,0 +1,42 @@ +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { Template } from 'aws-cdk-lib/assertions'; + +describe('Job', () => { + let stack: cdk.Stack; + let role: iam.IRole; + let script: glue.Code; + let codeBucket: s3.IBucket; + let job: glue.IJob; + + beforeEach(() => { + stack = new cdk.Stack(); + role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); + codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + script = glue.Code.fromBucket(codeBucket, 'script'); + }); + + describe('Create new Python Shell Job with default parameters', () => { + + beforeEach(() => { + job = new glue.PythonShellJob(stack, 'ImportedJob', { role, script }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 3.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '3.0', + }); + }); + }); +}); \ No newline at end of file From 273d603e9320b34f240061ff500a6bfa1ef409a6 Mon Sep 17 00:00:00 2001 From: Chris Williams Date: Tue, 5 Mar 2024 09:04:43 +0000 Subject: [PATCH 04/51] Fixed broken path.join --- packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts index c5ecc8eafb631..712280be6831e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.ts @@ -23,7 +23,7 @@ const app = new cdk.App(); const stack = new cdk.Stack(app, 'aws-glue-job-python-shell'); -const script = glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')); +const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); const iam_role = new iam.Role(stack, 'IAMServiceRole', { assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), From f66e0e0f38a75d8b8fc22419450f17d4e0d840af Mon Sep 17 00:00:00 2001 From: Chris Williams Date: Mon, 11 Mar 2024 09:41:40 +0000 Subject: [PATCH 05/51] Updated Python Shell Jobs integration test output to match updated integration test. --- .../lib/jobs/python-shell-job.ts | 6 +- .../aws-glue-job-python-shell.assets.json | 6 +- .../aws-glue-job-python-shell.template.json | 130 ++---- ...efaultTestDeployAssert453D25B7.assets.json | 2 +- .../cdk.out | 2 +- .../integ.json | 2 +- .../manifest.json | 86 +++- .../tree.json | 434 +++++++----------- 8 files changed, 286 insertions(+), 382 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts index 9e70496613f9f..966454da77457 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts @@ -16,7 +16,7 @@ import { JobType, GlueVersion, PythonVersion, MaxCapacity } from '../constants'; /** * Properties for creating a Python Shell job */ -export interface PythonShellJobProperties extends JobProperties { +export interface PythonShellJobProps extends JobProperties { /** * Python Version * The version of Python to use to execute this job @@ -49,7 +49,7 @@ export class PythonShellJob extends Job { * @param id * @param props */ - constructor(scope: Construct, id: string, props: PythonShellJobProperties) { + constructor(scope: Construct, id: string, props: PythonShellJobProps) { super(scope, id, { physicalName: props.jobName }); // Set up role and permissions for principal @@ -99,7 +99,7 @@ export class PythonShellJob extends Job { * @param props * @returns An array of arguments for Glue to use on execution */ - private executableArguments(props: PythonShellJobProperties) { + private executableArguments(props: PythonShellJobProps) { const args: { [key: string]: string } = {}; //If no Python version set (default 3.9) or the version is set to 3.9 then set library-set argument diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.assets.json index 17b109b19285f..0a415cd107153 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.assets.json @@ -1,5 +1,5 @@ { - "version": "33.0.0", + "version": "36.0.0", "files": { "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": { "source": { @@ -14,7 +14,7 @@ } } }, - "13432a74ca6cfada399f4d2b33385964f66c49aeeb01c5f0cefec52560a4dffa": { + "aeda11f7bb7dfbd52c66176f2e7ae14f20571f1f22ab7988a59bc714daf278a0": { "source": { "path": "aws-glue-job-python-shell.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "13432a74ca6cfada399f4d2b33385964f66c49aeeb01c5f0cefec52560a4dffa.json", + "objectKey": "aeda11f7bb7dfbd52c66176f2e7ae14f20571f1f22ab7988a59bc714daf278a0.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.template.json index dece180ae8219..9c752c984fef2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.template.json @@ -1,6 +1,6 @@ { "Resources": { - "ShellJobServiceRoleCF97BC4B": { + "IAMServiceRole61C662C4": { "Type": "AWS::IAM::Role", "Properties": { "AssumeRolePolicyDocument": { @@ -31,7 +31,7 @@ ] } }, - "ShellJobServiceRoleDefaultPolicy7F22D315": { + "IAMServiceRoleDefaultPolicy379D1A0E": { "Type": "AWS::IAM::Policy", "Properties": { "PolicyDocument": { @@ -80,20 +80,20 @@ ], "Version": "2012-10-17" }, - "PolicyName": "ShellJobServiceRoleDefaultPolicy7F22D315", + "PolicyName": "IAMServiceRoleDefaultPolicy379D1A0E", "Roles": [ { - "Ref": "ShellJobServiceRoleCF97BC4B" + "Ref": "IAMServiceRole61C662C4" } ] } }, - "ShellJob42E81F95": { + "BasicShellJob39F2E7D12A": { "Type": "AWS::Glue::Job", "Properties": { "Command": { "Name": "pythonshell", - "PythonVersion": "3", + "PythonVersion": "3.9", "ScriptLocation": { "Fn::Join": [ "", @@ -108,113 +108,51 @@ } }, "DefaultArguments": { - "--job-language": "python", - "arg1": "value1", - "arg2": "value2" + "library-set": "analytics" }, - "GlueVersion": "1.0", + "GlueVersion": "3.0", "MaxCapacity": 0.0625, - "Name": "ShellJob", + "MaxRetries": 0, "Role": { "Fn::GetAtt": [ - "ShellJobServiceRoleCF97BC4B", + "IAMServiceRole61C662C4", "Arn" ] - }, - "Tags": { - "key": "value" } } }, - "ShellJob39ServiceRole2F6F3768": { - "Type": "AWS::IAM::Role", + "BasicShellJobC7D0761E": { + "Type": "AWS::Glue::Job", "Properties": { - "AssumeRolePolicyDocument": { - "Statement": [ - { - "Action": "sts:AssumeRole", - "Effect": "Allow", - "Principal": { - "Service": "glue.amazonaws.com" - } - } - ], - "Version": "2012-10-17" - }, - "ManagedPolicyArns": [ - { + "Command": { + "Name": "pythonshell", + "PythonVersion": "3", + "ScriptLocation": { "Fn::Join": [ "", [ - "arn:", + "s3://", { - "Ref": "AWS::Partition" + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" }, - ":iam::aws:policy/service-role/AWSGlueServiceRole" + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" ] ] } - ] - } - }, - "ShellJob39ServiceRoleDefaultPolicy38A33919": { - "Type": "AWS::IAM::Policy", - "Properties": { - "PolicyDocument": { - "Statement": [ - { - "Action": [ - "s3:GetBucket*", - "s3:GetObject*", - "s3:List*" - ], - "Effect": "Allow", - "Resource": [ - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":s3:::", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - }, - "/*" - ] - ] - }, - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":s3:::", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - } - ] - ] - } - ] - } - ], - "Version": "2012-10-17" }, - "PolicyName": "ShellJob39ServiceRoleDefaultPolicy38A33919", - "Roles": [ - { - "Ref": "ShellJob39ServiceRole2F6F3768" - } - ] + "DefaultArguments": {}, + "GlueVersion": "1.0", + "MaxCapacity": 0.0625, + "MaxRetries": 0, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + } } }, - "ShellJob390C141361": { + "DetailedShellJob39CB370B41": { "Type": "AWS::Glue::Job", "Properties": { "Command": { @@ -234,16 +172,18 @@ } }, "DefaultArguments": { - "--job-language": "python", + "library-set": "analytics", "arg1": "value1", "arg2": "value2" }, + "Description": "My detailed Python 3.9 Shell Job", "GlueVersion": "3.0", "MaxCapacity": 1, - "Name": "ShellJob39", + "MaxRetries": 0, + "Name": "My Python 3.9 Shell Job", "Role": { "Fn::GetAtt": [ - "ShellJob39ServiceRole2F6F3768", + "IAMServiceRole61C662C4", "Arn" ] }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/awsgluejobpythonshellintegtestDefaultTestDeployAssert453D25B7.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/awsgluejobpythonshellintegtestDefaultTestDeployAssert453D25B7.assets.json index fcf891c433efb..fc44607a05dee 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/awsgluejobpythonshellintegtestDefaultTestDeployAssert453D25B7.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/awsgluejobpythonshellintegtestDefaultTestDeployAssert453D25B7.assets.json @@ -1,5 +1,5 @@ { - "version": "33.0.0", + "version": "36.0.0", "files": { "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { "source": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/cdk.out index 560dae10d018f..1f0068d32659a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/cdk.out +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/cdk.out @@ -1 +1 @@ -{"version":"33.0.0"} \ No newline at end of file +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/integ.json index 89660486d806d..30e0cedc0c82d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/integ.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/integ.json @@ -1,5 +1,5 @@ { - "version": "33.0.0", + "version": "36.0.0", "testCases": { "aws-glue-job-python-shell-integ-test/DefaultTest": { "stacks": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/manifest.json index 93cd15ece1b08..24a56a14662e5 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/manifest.json @@ -1,5 +1,5 @@ { - "version": "33.0.0", + "version": "36.0.0", "artifacts": { "aws-glue-job-python-shell.assets": { "type": "cdk:asset-manifest", @@ -14,10 +14,11 @@ "environment": "aws://unknown-account/unknown-region", "properties": { "templateFile": "aws-glue-job-python-shell.template.json", + "terminationProtection": false, "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/13432a74ca6cfada399f4d2b33385964f66c49aeeb01c5f0cefec52560a4dffa.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/aeda11f7bb7dfbd52c66176f2e7ae14f20571f1f22ab7988a59bc714daf278a0.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ @@ -33,40 +34,34 @@ "aws-glue-job-python-shell.assets" ], "metadata": { - "/aws-glue-job-python-shell/ShellJob/ServiceRole/Resource": [ + "/aws-glue-job-python-shell/IAMServiceRole/Resource": [ { "type": "aws:cdk:logicalId", - "data": "ShellJobServiceRoleCF97BC4B" + "data": "IAMServiceRole61C662C4" } ], - "/aws-glue-job-python-shell/ShellJob/ServiceRole/DefaultPolicy/Resource": [ + "/aws-glue-job-python-shell/IAMServiceRole/DefaultPolicy/Resource": [ { "type": "aws:cdk:logicalId", - "data": "ShellJobServiceRoleDefaultPolicy7F22D315" + "data": "IAMServiceRoleDefaultPolicy379D1A0E" } ], - "/aws-glue-job-python-shell/ShellJob/Resource": [ + "/aws-glue-job-python-shell/BasicShellJob39/Resource": [ { "type": "aws:cdk:logicalId", - "data": "ShellJob42E81F95" + "data": "BasicShellJob39F2E7D12A" } ], - "/aws-glue-job-python-shell/ShellJob39/ServiceRole/Resource": [ + "/aws-glue-job-python-shell/BasicShellJob/Resource": [ { "type": "aws:cdk:logicalId", - "data": "ShellJob39ServiceRole2F6F3768" + "data": "BasicShellJobC7D0761E" } ], - "/aws-glue-job-python-shell/ShellJob39/ServiceRole/DefaultPolicy/Resource": [ + "/aws-glue-job-python-shell/DetailedShellJob39/Resource": [ { "type": "aws:cdk:logicalId", - "data": "ShellJob39ServiceRoleDefaultPolicy38A33919" - } - ], - "/aws-glue-job-python-shell/ShellJob39/Resource": [ - { - "type": "aws:cdk:logicalId", - "data": "ShellJob390C141361" + "data": "DetailedShellJob39CB370B41" } ], "/aws-glue-job-python-shell/BootstrapVersion": [ @@ -80,6 +75,60 @@ "type": "aws:cdk:logicalId", "data": "CheckBootstrapVersion" } + ], + "ShellJobServiceRoleCF97BC4B": [ + { + "type": "aws:cdk:logicalId", + "data": "ShellJobServiceRoleCF97BC4B", + "trace": [ + "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" + ] + } + ], + "ShellJobServiceRoleDefaultPolicy7F22D315": [ + { + "type": "aws:cdk:logicalId", + "data": "ShellJobServiceRoleDefaultPolicy7F22D315", + "trace": [ + "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" + ] + } + ], + "ShellJob42E81F95": [ + { + "type": "aws:cdk:logicalId", + "data": "ShellJob42E81F95", + "trace": [ + "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" + ] + } + ], + "ShellJob39ServiceRole2F6F3768": [ + { + "type": "aws:cdk:logicalId", + "data": "ShellJob39ServiceRole2F6F3768", + "trace": [ + "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" + ] + } + ], + "ShellJob39ServiceRoleDefaultPolicy38A33919": [ + { + "type": "aws:cdk:logicalId", + "data": "ShellJob39ServiceRoleDefaultPolicy38A33919", + "trace": [ + "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" + ] + } + ], + "ShellJob390C141361": [ + { + "type": "aws:cdk:logicalId", + "data": "ShellJob390C141361", + "trace": [ + "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" + ] + } ] }, "displayName": "aws-glue-job-python-shell" @@ -97,6 +146,7 @@ "environment": "aws://unknown-account/unknown-region", "properties": { "templateFile": "awsgluejobpythonshellintegtestDefaultTestDeployAssert453D25B7.template.json", + "terminationProtection": false, "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/tree.json index 05905851160a8..8f39bfe79fe3c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/tree.json @@ -8,149 +8,149 @@ "id": "aws-glue-job-python-shell", "path": "aws-glue-job-python-shell", "children": { - "ShellJob": { - "id": "ShellJob", - "path": "aws-glue-job-python-shell/ShellJob", + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-python-shell/IAMServiceRole", "children": { - "ServiceRole": { - "id": "ServiceRole", - "path": "aws-glue-job-python-shell/ShellJob/ServiceRole", + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-python-shell/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-python-shell/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-python-shell/IAMServiceRole/DefaultPolicy", "children": { - "ImportServiceRole": { - "id": "ImportServiceRole", - "path": "aws-glue-job-python-shell/ShellJob/ServiceRole/ImportServiceRole", - "constructInfo": { - "fqn": "aws-cdk-lib.Resource", - "version": "0.0.0" - } - }, "Resource": { "id": "Resource", - "path": "aws-glue-job-python-shell/ShellJob/ServiceRole/Resource", + "path": "aws-glue-job-python-shell/IAMServiceRole/DefaultPolicy/Resource", "attributes": { - "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", "aws:cdk:cloudformation:props": { - "assumeRolePolicyDocument": { + "policyDocument": { "Statement": [ { - "Action": "sts:AssumeRole", + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], "Effect": "Allow", - "Principal": { - "Service": "glue.amazonaws.com" - } + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] } ], "Version": "2012-10-17" }, - "managedPolicyArns": [ + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":iam::aws:policy/service-role/AWSGlueServiceRole" - ] - ] + "Ref": "IAMServiceRole61C662C4" } ] } }, "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.CfnRole", - "version": "0.0.0" - } - }, - "DefaultPolicy": { - "id": "DefaultPolicy", - "path": "aws-glue-job-python-shell/ShellJob/ServiceRole/DefaultPolicy", - "children": { - "Resource": { - "id": "Resource", - "path": "aws-glue-job-python-shell/ShellJob/ServiceRole/DefaultPolicy/Resource", - "attributes": { - "aws:cdk:cloudformation:type": "AWS::IAM::Policy", - "aws:cdk:cloudformation:props": { - "policyDocument": { - "Statement": [ - { - "Action": [ - "s3:GetBucket*", - "s3:GetObject*", - "s3:List*" - ], - "Effect": "Allow", - "Resource": [ - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":s3:::", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - }, - "/*" - ] - ] - }, - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":s3:::", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - } - ] - ] - } - ] - } - ], - "Version": "2012-10-17" - }, - "policyName": "ShellJobServiceRoleDefaultPolicy7F22D315", - "roles": [ - { - "Ref": "ShellJobServiceRoleCF97BC4B" - } - ] - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", - "version": "0.0.0" - } - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.Policy", + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.Role", + "fqn": "aws-cdk-lib.aws_iam.Policy", "version": "0.0.0" } - }, - "Code8835353412338ec0bac0ee05542d1c16": { - "id": "Code8835353412338ec0bac0ee05542d1c16", - "path": "aws-glue-job-python-shell/ShellJob/Code8835353412338ec0bac0ee05542d1c16", + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicShellJob39": { + "id": "BasicShellJob39", + "path": "aws-glue-job-python-shell/BasicShellJob39", + "children": { + "Code2907ea7be4a583708cfffc21b3df1dfa": { + "id": "Code2907ea7be4a583708cfffc21b3df1dfa", + "path": "aws-glue-job-python-shell/BasicShellJob39/Code2907ea7be4a583708cfffc21b3df1dfa", "children": { "Stage": { "id": "Stage", - "path": "aws-glue-job-python-shell/ShellJob/Code8835353412338ec0bac0ee05542d1c16/Stage", + "path": "aws-glue-job-python-shell/BasicShellJob39/Code2907ea7be4a583708cfffc21b3df1dfa/Stage", "constructInfo": { "fqn": "aws-cdk-lib.AssetStaging", "version": "0.0.0" @@ -158,7 +158,7 @@ }, "AssetBucket": { "id": "AssetBucket", - "path": "aws-glue-job-python-shell/ShellJob/Code8835353412338ec0bac0ee05542d1c16/AssetBucket", + "path": "aws-glue-job-python-shell/BasicShellJob39/Code2907ea7be4a583708cfffc21b3df1dfa/AssetBucket", "constructInfo": { "fqn": "aws-cdk-lib.aws_s3.BucketBase", "version": "0.0.0" @@ -172,7 +172,7 @@ }, "Resource": { "id": "Resource", - "path": "aws-glue-job-python-shell/ShellJob/Resource", + "path": "aws-glue-job-python-shell/BasicShellJob39/Resource", "attributes": { "aws:cdk:cloudformation:type": "AWS::Glue::Job", "aws:cdk:cloudformation:props": { @@ -190,24 +190,19 @@ ] ] }, - "pythonVersion": "3" + "pythonVersion": "3.9" }, "defaultArguments": { - "--job-language": "python", - "arg1": "value1", - "arg2": "value2" + "library-set": "analytics" }, - "glueVersion": "1.0", + "glueVersion": "3.0", "maxCapacity": 0.0625, - "name": "ShellJob", + "maxRetries": 0, "role": { "Fn::GetAtt": [ - "ShellJobServiceRoleCF97BC4B", + "IAMServiceRole61C662C4", "Arn" ] - }, - "tags": { - "key": "value" } } }, @@ -218,149 +213,66 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue-alpha.Job", + "fqn": "@aws-cdk/aws-glue-alpha.PythonShellJob", "version": "0.0.0" } }, - "ShellJob39": { - "id": "ShellJob39", - "path": "aws-glue-job-python-shell/ShellJob39", + "BasicShellJob": { + "id": "BasicShellJob", + "path": "aws-glue-job-python-shell/BasicShellJob", "children": { - "ServiceRole": { - "id": "ServiceRole", - "path": "aws-glue-job-python-shell/ShellJob39/ServiceRole", - "children": { - "ImportServiceRole": { - "id": "ImportServiceRole", - "path": "aws-glue-job-python-shell/ShellJob39/ServiceRole/ImportServiceRole", - "constructInfo": { - "fqn": "aws-cdk-lib.Resource", - "version": "0.0.0" - } - }, - "Resource": { - "id": "Resource", - "path": "aws-glue-job-python-shell/ShellJob39/ServiceRole/Resource", - "attributes": { - "aws:cdk:cloudformation:type": "AWS::IAM::Role", - "aws:cdk:cloudformation:props": { - "assumeRolePolicyDocument": { - "Statement": [ + "Resource": { + "id": "Resource", + "path": "aws-glue-job-python-shell/BasicShellJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "pythonshell", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", { - "Action": "sts:AssumeRole", - "Effect": "Allow", - "Principal": { - "Service": "glue.amazonaws.com" - } - } - ], - "Version": "2012-10-17" - }, - "managedPolicyArns": [ - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":iam::aws:policy/service-role/AWSGlueServiceRole" - ] - ] - } - ] - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.CfnRole", - "version": "0.0.0" - } - }, - "DefaultPolicy": { - "id": "DefaultPolicy", - "path": "aws-glue-job-python-shell/ShellJob39/ServiceRole/DefaultPolicy", - "children": { - "Resource": { - "id": "Resource", - "path": "aws-glue-job-python-shell/ShellJob39/ServiceRole/DefaultPolicy/Resource", - "attributes": { - "aws:cdk:cloudformation:type": "AWS::IAM::Policy", - "aws:cdk:cloudformation:props": { - "policyDocument": { - "Statement": [ - { - "Action": [ - "s3:GetBucket*", - "s3:GetObject*", - "s3:List*" - ], - "Effect": "Allow", - "Resource": [ - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":s3:::", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - }, - "/*" - ] - ] - }, - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":s3:::", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - } - ] - ] - } - ] - } - ], - "Version": "2012-10-17" + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" }, - "policyName": "ShellJob39ServiceRoleDefaultPolicy38A33919", - "roles": [ - { - "Ref": "ShellJob39ServiceRole2F6F3768" - } - ] - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", - "version": "0.0.0" - } - } + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.Policy", - "version": "0.0.0" + "defaultArguments": {}, + "glueVersion": "1.0", + "maxCapacity": 0.0625, + "maxRetries": 0, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] } } }, "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.Role", + "fqn": "aws-cdk-lib.aws_glue.CfnJob", "version": "0.0.0" } - }, + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PythonShellJob", + "version": "0.0.0" + } + }, + "DetailedShellJob39": { + "id": "DetailedShellJob39", + "path": "aws-glue-job-python-shell/DetailedShellJob39", + "children": { "Resource": { "id": "Resource", - "path": "aws-glue-job-python-shell/ShellJob39/Resource", + "path": "aws-glue-job-python-shell/DetailedShellJob39/Resource", "attributes": { "aws:cdk:cloudformation:type": "AWS::Glue::Job", "aws:cdk:cloudformation:props": { @@ -381,16 +293,18 @@ "pythonVersion": "3.9" }, "defaultArguments": { - "--job-language": "python", + "library-set": "analytics", "arg1": "value1", "arg2": "value2" }, + "description": "My detailed Python 3.9 Shell Job", "glueVersion": "3.0", "maxCapacity": 1, - "name": "ShellJob39", + "maxRetries": 0, + "name": "My Python 3.9 Shell Job", "role": { "Fn::GetAtt": [ - "ShellJob39ServiceRole2F6F3768", + "IAMServiceRole61C662C4", "Arn" ] }, @@ -406,7 +320,7 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue-alpha.Job", + "fqn": "@aws-cdk/aws-glue-alpha.PythonShellJob", "version": "0.0.0" } }, @@ -445,7 +359,7 @@ "path": "aws-glue-job-python-shell-integ-test/DefaultTest/Default", "constructInfo": { "fqn": "constructs.Construct", - "version": "10.2.69" + "version": "10.3.0" } }, "DeployAssert": { @@ -491,7 +405,7 @@ "path": "Tree", "constructInfo": { "fqn": "constructs.Construct", - "version": "10.2.69" + "version": "10.3.0" } } }, From e84444964b22cd67bb171f45d21cdf83fec2f623 Mon Sep 17 00:00:00 2001 From: Chris Williams Date: Mon, 11 Mar 2024 10:23:37 +0000 Subject: [PATCH 06/51] Updated Python Shell Job unit tests to validate default values --- ...{python-shell-job.ts => python-shell-job.test.ts} | 12 ++++++++++++ 1 file changed, 12 insertions(+) rename packages/@aws-cdk/aws-glue-alpha/test/{python-shell-job.ts => python-shell-job.test.ts} (78%) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.ts b/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts similarity index 78% rename from packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.ts rename to packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts index d8d5eaeaf40ba..b76fdc7cc8055 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts @@ -38,5 +38,17 @@ describe('Job', () => { GlueVersion: '3.0', }); }); + + test('Default Max Retries should be 0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxRetries: 0, + }); + }); + + test('Default Max Capacity should be 0.0625', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxCapacity: 0.0625, + }); + }); }); }); \ No newline at end of file From 8b367fa2c272debf86ece0a26d4e314f6a10fbe4 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 9 Apr 2024 13:19:26 +0000 Subject: [PATCH 07/51] PySpark Streaming job --- .../lib/jobs/pyspark-streaming-job.ts | 180 ++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts new file mode 100644 index 0000000000000..70955bb2c53e6 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -0,0 +1,180 @@ +/** + * Python Spark Streaming Jobs class + * + * A Streaming job is similar to an ETL job, except that it performs ETL on data streams + * using the Apache Spark Structured Streaming framework. + * These jobs will default to use Python 3.9. + * + * Similar to ETL jobs, streaming job supports Scala and Python languages. Similar to ETL, + * it supports G1 and G2 worker type and 2.0, 3.0 and 4.0 version. We’ll default to G2 worker + * and 4.0 version for streaming jobs which developers can override. + * We will enable —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * + * RFC : https://github.com/aws/aws-cdk-rfcs/blob/main/text/0497-glue-l2-construct.md + */ + +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '../constants'; +import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui'; + +/** + * Properties for creating a Python Spark ETL job + */ +export interface PySparkStreamingJobProps extends JobProperties { + + /** + * Enables the Spark UI debugging and monitoring with the specified props. + * + * @default - Spark UI debugging and monitoring is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly sparkUI?: SparkUIProps; + + /** + * Extra Python Files S3 URL (optional) + * S3 URL where additional python dependencies are located + */ + readonly extraPythonFiles?: string[]; +} + +/** + * A Python Spark Streaming Glue Job + */ +export class pySparkStreamingJob extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * The Spark UI logs location if Spark UI monitoring and debugging is enabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + public readonly sparkUILoggingLocation?: SparkUILoggingLocation; + + /** + * pySparkStreamingJob constructor + * + * @param scope + * @param id + * @param props + */ + constructor(scope: Construct, id: string, props: PySparkStreamingJobProps) { + super(scope, id, { + physicalName: props.jobName, + }); + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Enable SparkUI by default as a best practice + const sparkUIArgs = props.sparkUI?.bucket ? this.setupSparkUI(this.role, props.sparkUI) : undefined; + this.sparkUILoggingLocation = sparkUIArgs?.location; + + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const profilingMetricsArgs = { '--enable-metrics': '' }; + + // Gather executable arguments + const executableArgs = this.executableArguments(props); + + // Conbine command line arguments into a single line item + const defaultArguments = { + ...executableArgs, + ...continuousLoggingArgs, + ...profilingMetricsArgs, + ...sparkUIArgs?.args, + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { + throw new Error('Both workerType and numberOrWorkers must be set'); + } + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.STREAMING, + scriptLocation: this.codeS3ObjectUrl(props.script), + pythonVersion: PythonVersion.THREE_NINE, + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, + numberOfWorkers: props.numberOrWorkers, + maxRetries: props.maxRetries, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + } + + /** + * Set the executable arguments with best practices enabled by default + * + * @param props + * @returns An array of arguments for Glue to use on execution + */ + private executableArguments(props: PySparkStreamingJobProps) { + const args: { [key: string]: string } = {}; + args['--job-language'] = JobLanguage.PYTHON; + + // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any + if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { + //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + + // if (props.extraJars && props.extraJars?.length > 0) { + // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraFiles && props.extraFiles.length > 0) { + // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraJarsFirst) { + // args['--user-jars-first'] = 'true'; + // } + + return args; + } + + private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { + + validateSparkUiPrefix(sparkUiProps.prefix); + const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket'); + bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); + const args = { + '--enable-spark-ui': 'true', + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + }; + + return { + location: { + prefix: sparkUiProps.prefix, + bucket, + }, + args, + }; + } +} \ No newline at end of file From d19f5cc9c3ea783eae99e1d5cda978fbb45d6611 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 9 Apr 2024 13:47:02 +0000 Subject: [PATCH 08/51] PySpark Streaming job --- .../lib/jobs/pyspark-streaming-job.ts | 2 +- .../lib/jobs/scala-spark-streaming-job.ts | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index 70955bb2c53e6..fc7824351099d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -19,7 +19,7 @@ import { Bucket } from 'aws-cdk-lib/aws-s3'; import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '../constants'; -import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui'; +import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; /** * Properties for creating a Python Spark ETL job diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts new file mode 100644 index 0000000000000..c5e2ba9e1c42e --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts @@ -0,0 +1,14 @@ +/** + * Scala Streaming Jobs class + * + * A Streaming job is similar to an ETL job, except that it performs ETL on data streams + * using the Apache Spark Structured Streaming framework. + * These jobs will default to use Python 3.9. + * + * Similar to ETL jobs, streaming job supports Scala and Python languages. Similar to ETL, + * it supports G1 and G2 worker type and 2.0, 3.0 and 4.0 version. We’ll default to G2 worker + * and 4.0 version for streaming jobs which developers can override. + * We will enable —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * + * RFC: https://github.com/aws/aws-cdk-rfcs/blob/main/text/0497-glue-l2-construct.md + */ \ No newline at end of file From 080977820e5e61dd605e8adf60e1a45dc4b0468d Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 9 Apr 2024 15:21:50 +0000 Subject: [PATCH 09/51] Scala Spark Streaming Job class --- .../lib/jobs/scala-spark-streaming-job.ts | 171 +++++++++++++++++- 1 file changed, 170 insertions(+), 1 deletion(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts index c5e2ba9e1c42e..d664a1c457ed9 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts @@ -11,4 +11,173 @@ * We will enable —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. * * RFC: https://github.com/aws/aws-cdk-rfcs/blob/main/text/0497-glue-l2-construct.md - */ \ No newline at end of file + */ + +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '../constants'; +import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; + + +/** + * Properties for creating a Scala Spark ETL job + */ +export interface ScalaSparkStreamingJobProps extends JobProperties { + + /** + * Enables the Spark UI debugging and monitoring with the specified props. + * + * @default - Spark UI debugging and monitoring is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly sparkUI?: SparkUIProps; + + /** + * Class name (required for Scala scripts) + * Package and class name for the entry point of Glue job execution for + * Java scripts + **/ + className: string; + + /** + * Extra Jars S3 URL (optional) + * S3 URL where additional jar dependencies are located + */ + readonly extraJars?: string[]; +} + +/** + * A Scala Spark Streaming Glue Job + */ +export class ScalaSparkStreamingJob extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * The Spark UI logs location if Spark UI monitoring and debugging is enabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + public readonly sparkUILoggingLocation?: SparkUILoggingLocation; + + /** + * ScalaSparkStreamingJob constructor + * + * @param scope + * @param id + * @param props + */ + constructor(scope: Construct, id: string, props: ScalaSparkStreamingJobProps) { + super(scope, id, { + physicalName: props.jobName, + }); + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Enable SparkUI by default as a best practice + const sparkUIArgs = props.sparkUI?.bucket ? this.setupSparkUI(this.role, props.sparkUI) : undefined; + this.sparkUILoggingLocation = sparkUIArgs?.location; + + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const profilingMetricsArgs = { '--enable-metrics': '' }; + + // Gather executable arguments + const executableArgs = this.executableArguments(props); + + // Conbine command line arguments into a single line item + const defaultArguments = { + ...executableArgs, + ...continuousLoggingArgs, + ...profilingMetricsArgs, + ...sparkUIArgs?.args, + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { + throw new Error('Both workerType and numberOrWorkers must be set'); + } + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.STREAMING, + scriptLocation: this.codeS3ObjectUrl(props.script), + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, + numberOfWorkers: props.numberOrWorkers, + maxRetries: props.maxRetries, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + } + + /** + * Set the executable arguments with best practices enabled by default + * + * @param props + * @returns An array of arguments for Glue to use on execution + */ + private executableArguments(props: ScalaSparkStreamingJobProps) { + const args: { [key: string]: string } = {}; + args['--job-language'] = JobLanguage.PYTHON; + + // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any + if (props.extraJars && props.extraJars?.length > 0) { + // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + } + // if (props.extraFiles && props.extraFiles.length > 0) { + // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraJarsFirst) { + // args['--user-jars-first'] = 'true'; + // } + + return args; + } + + private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { + + validateSparkUiPrefix(sparkUiProps.prefix); + const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket'); + bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); + const args = { + '--enable-spark-ui': 'true', + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + }; + + return { + location: { + prefix: sparkUiProps.prefix, + bucket, + }, + args, + }; + } +} From 6118d80862cbcbd6dca7a83fc3971a37022353dc Mon Sep 17 00:00:00 2001 From: Prashanna B Date: Fri, 12 Apr 2024 08:09:57 +0000 Subject: [PATCH 10/51] PySpark and Scala Flex ETL jobs L2 constructs --- .../@aws-cdk/aws-glue-alpha/lib/constants.ts | 7 + packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 2 + .../lib/jobs/pysparkflex-etl-job.ts | 180 +++++++++ .../lib/jobs/scala-spark-flex-etl-job.ts | 203 ++++++++++ ...9be7858a12b228a2ae6e5c10faccd9097b1e855.py | 1 + .../aws-glue-job-pysparkflex-etl.assets.json | 32 ++ ...aws-glue-job-pysparkflex-etl.template.json | 204 ++++++++++ ...efaultTestDeployAssert3F3EC951.assets.json | 19 + ...aultTestDeployAssert3F3EC951.template.json | 36 ++ .../cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 131 ++++++ .../tree.json | 373 +++++++++++++++++ .../test/integ.job-pysparkflex-etl.ts | 66 +++ ...3f8703573eb6b69528c5d52190d72579c91602.jar | Bin 0 -> 782 bytes ...ws-glue-job-scalasparkflex-etl.assets.json | 32 ++ ...-glue-job-scalasparkflex-etl.template.json | 206 ++++++++++ ...efaultTestDeployAssert8009E6FC.assets.json | 19 + ...aultTestDeployAssert8009E6FC.template.json | 36 ++ .../cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 131 ++++++ .../tree.json | 375 ++++++++++++++++++ .../test/integ.job-scalasparkflex-etl.ts | 63 +++ .../test/job-jar/helloworld.jar | Bin 0 -> 782 bytes .../test/pysparkflex-etl-jobs.test.ts | 54 +++ .../test/scalasparkflex-etl-jobs.test.ts | 57 +++ 27 files changed, 2253 insertions(+) create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/pysparkflex-etl-jobs.test.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts index 7b1cfd7896fdf..5cae06b9a8006 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts @@ -51,6 +51,12 @@ export enum WorkerType { Z_2X = 'Z.2X', } +/** + * The number of workers of a defined workerType that are allocated when a job runs. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html + */ + /** * Job states emitted by Glue to CloudWatch Events. * @@ -196,6 +202,7 @@ export enum PythonVersion { * Python 3.9 (the exact version depends on GlueVersion and JobCommand used) */ THREE_NINE = '3.9', + } /** diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index 5c5d13f9b5c76..c63140567e53a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -16,9 +16,11 @@ export * from './constants'; export * from './jobs/job'; // export * from './jobs/flex-job'; export * from './jobs/pyspark-etl-job'; +export * from './jobs/pysparkflex-etl-job'; // export * from './jobs/python-shell-job'; // export * from './jobs/ray-job'; // export * from './jobs/scala-spark-etl-job'; +export * from './jobs/scala-spark-flex-etl-job'; export * from './jobs/spark-ui-utils'; // export * from './jobs/spark-etl-job'; //export * from './jobs/streaming-job'; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts new file mode 100644 index 0000000000000..296d1947524e1 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts @@ -0,0 +1,180 @@ +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType, ExecutionClass } from '../constants'; +import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; + +/** + * Flex Jobs class + * + * Flex jobs supports Python and Scala language. + * The flexible execution class is appropriate for non-urgent jobs such as + * pre-production jobs, testing, and one-time data loads. + * Flexible job runs are supported for jobs using AWS Glue version 3.0 or later and G.1X or + * G.2X worker types but will default to the latest version of Glue (currently Glue 3.0.) + * + * Similar to ETL, we’ll enable these features: —enable-metrics, —enable-spark-ui, + * —enable-continuous-cloudwatch-log + * + */ + +export interface PySparkFlexEtlJobProps extends JobProperties { + + /** + * Enables the Spark UI debugging and monitoring with the specified props. + * + * @default - Spark UI debugging and monitoring is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly sparkUI?: SparkUIProps; + + /** + * Extra Python Files S3 URL (optional) + * S3 URL where additional python dependencies are located + * @default - no extra files + */ + readonly extraPythonFiles?: string[]; + +} + +/** + * A Python Spark ETL Glue Job + */ +export class PySparkFlexEtlJob extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * The Spark UI logs location if Spark UI monitoring and debugging is enabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + public readonly sparkUILoggingLocation?: SparkUILoggingLocation; + + /** + * PySparkFlexEtlJob constructor + * + * @param scope + * @param id + * @param props + */ + constructor(scope: Construct, id: string, props: PySparkFlexEtlJobProps) { + super(scope, id, { + physicalName: props.jobName, + }); + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Enable SparkUI by default as a best practice + const sparkUIArgs = props.sparkUI?.bucket ? this.setupSparkUI(this.role, props.sparkUI) : undefined; + this.sparkUILoggingLocation = sparkUIArgs?.location; + + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const profilingMetricsArgs = { '--enable-metrics': '' }; + + // Gather executable arguments + const execuatbleArgs = this.executableArguments(props); + + // Conbine command line arguments into a single line item + const defaultArguments = { + ...execuatbleArgs, + ...continuousLoggingArgs, + ...profilingMetricsArgs, + ...sparkUIArgs?.args, + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + /*if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { + throw new Error('Both workerType and numberOrWorkers must be set'); + } */ + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.ETL, + scriptLocation: this.codeS3ObjectUrl(props.script), + pythonVersion: PythonVersion.THREE, + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, + numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, + maxRetries: props.maxRetries, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + executionClass: ExecutionClass.FLEX, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + } + + /** + * Set the executable arguments with best practices enabled by default + * + * @param props + * @returns An array of arguments for Glue to use on execution + */ + private executableArguments(props: PySparkFlexEtlJobProps) { + const args: { [key: string]: string } = {}; + args['--job-language'] = JobLanguage.PYTHON; + + // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any + if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { + //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + + // if (props.extraJars && props.extraJars?.length > 0) { + // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraFiles && props.extraFiles.length > 0) { + // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraJarsFirst) { + // args['--user-jars-first'] = 'true'; + // } + + return args; + } + + private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { + + validateSparkUiPrefix(sparkUiProps.prefix); + const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket'); + bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); + const args = { + '--enable-spark-ui': 'true', + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + }; + + return { + location: { + prefix: sparkUiProps.prefix, + bucket, + }, + args, + }; + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts new file mode 100644 index 0000000000000..dc46dec4aeaae --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts @@ -0,0 +1,203 @@ +/** + * Spark ETL Jobs class + * ETL jobs support pySpark and Scala languages, for which there are separate + * but similar constructors. ETL jobs default to the G2 worker type, but you + * can override this default with other supported worker type values + * (G1, G2, G4 and G8). ETL jobs defaults to Glue version 4.0, which you can + * override to 3.0. The following ETL features are enabled by default: + * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. + * You can find more details about version, worker type and other features + * in Glue's public documentation. + */ + +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType, ExecutionClass } from '../constants'; +import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; + +/** + * Flex Jobs class + * + * Flex jobs supports Python and Scala language. + * The flexible execution class is appropriate for non-urgent jobs such as + * pre-production jobs, testing, and one-time data loads. + * Flexible job runs are supported for jobs using AWS Glue version 3.0 or later and G.1X or + * G.2X worker types but will default to the latest version of Glue (currently Glue 3.0.) + * + * Similar to ETL, we’ll enable these features: —enable-metrics, —enable-spark-ui, + * —enable-continuous-cloudwatch-log + * + */ + +export interface ScalaSparkFlexEtlJobProps extends JobProperties { + + /** + * Enables the Spark UI debugging and monitoring with the specified props. + * + * @default - Spark UI debugging and monitoring is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly sparkUI?: SparkUIProps; + + /** + * Extra Python Files S3 URL (optional) + * S3 URL where additional python dependencies are located + * @default - no extra files + */ + readonly extraPythonFiles?: string[]; + + /** + * Scala class to be passed as Default Argument to the ETL job + * @default - your scala class + */ + readonly className?: string; + +} + +/** + * A Python Spark ETL Glue Job + */ +export class ScalaSparkFlexEtlJob extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * The Spark UI logs location if Spark UI monitoring and debugging is enabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + public readonly sparkUILoggingLocation?: SparkUILoggingLocation; + + /** + * PySparkFlexEtlJob constructor + * + * @param scope + * @param id + * @param props + */ + constructor(scope: Construct, id: string, props: ScalaSparkFlexEtlJobProps) { + super(scope, id, { + physicalName: props.jobName, + }); + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Enable SparkUI by default as a best practice + const sparkUIArgs = props.sparkUI?.bucket ? this.setupSparkUI(this.role, props.sparkUI) : undefined; + this.sparkUILoggingLocation = sparkUIArgs?.location; + + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const profilingMetricsArgs = { '--enable-metrics': '' }; + + // Gather executable arguments + const execuatbleArgs = this.executableArguments(props); + + if (props.className === undefined) { + throw new Error('className must be set for Scala ETL Jobs'); + } + + // Conbine command line arguments into a single line item + const defaultArguments = { + ...execuatbleArgs, + ...continuousLoggingArgs, + ...profilingMetricsArgs, + ...sparkUIArgs?.args, + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + /*if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { + throw new Error('Both workerType and numberOrWorkers must be set'); + } */ + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.ETL, + scriptLocation: this.codeS3ObjectUrl(props.script), + pythonVersion: PythonVersion.THREE, + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, + numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, + maxRetries: props.maxRetries, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + executionClass: ExecutionClass.FLEX, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + } + + /** + * Set the executable arguments with best practices enabled by default + * + * @param props + * @returns An array of arguments for Glue to use on execution + */ + private executableArguments(props: ScalaSparkFlexEtlJobProps) { + const args: { [key: string]: string } = {}; + args['--job-language'] = JobLanguage.SCALA; + args['--class'] = props.className!; + + // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any + if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { + //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + + // if (props.extraJars && props.extraJars?.length > 0) { + // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraFiles && props.extraFiles.length > 0) { + // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraJarsFirst) { + // args['--user-jars-first'] = 'true'; + // } + + return args; + } + + private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { + + validateSparkUiPrefix(sparkUiProps.prefix); + const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket'); + bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); + const args = { + '--enable-spark-ui': 'true', + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + }; + + return { + location: { + prefix: sparkUiProps.prefix, + bucket, + }, + args, + }; + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py new file mode 100644 index 0000000000000..e75154b7c390f --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py @@ -0,0 +1 @@ +print("hello world") \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json new file mode 100644 index 0000000000000..b5ad08acc9ab4 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json @@ -0,0 +1,32 @@ +{ + "version": "36.0.0", + "files": { + "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": { + "source": { + "path": "asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + }, + "6f1761f37e0e58957866339a668ae140999f5800a1285a839297c3593438c1ea": { + "source": { + "path": "aws-glue-job-pysparkflex-etl.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "6f1761f37e0e58957866339a668ae140999f5800a1285a839297c3593438c1ea.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json new file mode 100644 index 0000000000000..af52f7c3eca39 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json @@ -0,0 +1,204 @@ +{ + "Resources": { + "IAMServiceRole61C662C4": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "IAMServiceRoleDefaultPolicy379D1A0E": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "Roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "BasicPySparkFlexEtlJobC50DC250": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueetl", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-metrics": "" + }, + "ExecutionClass": "FLEX", + "GlueVersion": "3.0", + "NumberOfWorkers": 10, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "WorkerType": "G.2X" + } + }, + "OverridePySparkFlexEtlJob8EE4CFA1": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueetl", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "Description": "Optional Override PySpark Flex Etl Job", + "ExecutionClass": "FLEX", + "GlueVersion": "3.0", + "Name": "Optional Override PySpark Flex Etl Job", + "NumberOfWorkers": 20, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "Tags": { + "key": "value" + }, + "Timeout": 15, + "WorkerType": "G.1X" + } + } + }, + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json new file mode 100644 index 0000000000000..d77fab393274a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json new file mode 100644 index 0000000000000..b837700f2ba0b --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-job-pysparkflex-etl" + ], + "assertionStack": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json new file mode 100644 index 0000000000000..56ea621a7e015 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-job-pysparkflex-etl.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-job-pysparkflex-etl.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-job-pysparkflex-etl": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-job-pysparkflex-etl.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/6f1761f37e0e58957866339a668ae140999f5800a1285a839297c3593438c1ea.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-job-pysparkflex-etl.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-job-pysparkflex-etl.assets" + ], + "metadata": { + "/aws-glue-job-pysparkflex-etl/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicPySparkFlexEtlJobC50DC250" + } + ], + "/aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OverridePySparkFlexEtlJob8EE4CFA1" + } + ], + "/aws-glue-job-pysparkflex-etl/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-pysparkflex-etl/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-pysparkflex-etl" + }, + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets" + ], + "metadata": { + "/aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json new file mode 100644 index 0000000000000..c28d10218218d --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json @@ -0,0 +1,373 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-job-pysparkflex-etl": { + "id": "aws-glue-job-pysparkflex-etl", + "path": "aws-glue-job-pysparkflex-etl", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicPySparkFlexEtlJob": { + "id": "BasicPySparkFlexEtlJob", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob", + "children": { + "Code2907ea7be4a583708cfffc21b3df1dfa": { + "id": "Code2907ea7be4a583708cfffc21b3df1dfa", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-metrics": "" + }, + "executionClass": "FLEX", + "glueVersion": "3.0", + "numberOfWorkers": 10, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkFlexEtlJob", + "version": "0.0.0" + } + }, + "OverridePySparkFlexEtlJob": { + "id": "OverridePySparkFlexEtlJob", + "path": "aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "description": "Optional Override PySpark Flex Etl Job", + "executionClass": "FLEX", + "glueVersion": "3.0", + "name": "Optional Override PySpark Flex Etl Job", + "numberOfWorkers": 20, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "timeout": 15, + "workerType": "G.1X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkFlexEtlJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-job-pysparkflex-etl-integ-test": { + "id": "aws-glue-job-pysparkflex-etl-integ-test", + "path": "aws-glue-job-pysparkflex-etl-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts new file mode 100644 index 0000000000000..ce3145dc0ff2b --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts @@ -0,0 +1,66 @@ +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as path from 'path'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; + +/** + * To verify the ability to run jobs created in this test + * + * Run the job using + * `aws glue start-job-run --region us-east-1 --job-name ` + * This will return a runId + * + * Get the status of the job run using + * `aws glue get-job-run --region us-east-1 --job-name --run-id ` + * + * For example, to test the ShellJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ShellJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ShellJob --run-id ` + * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" + */ + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'aws-glue-job-pysparkflex-etl'); + +const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); + +const iam_role = new iam.Role(stack, 'IAMServiceRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], +}); + +new glue.PySparkFlexEtlJob(stack, 'BasicPySparkFlexEtlJob', { + script: script, + role: iam_role, +}); + +/*new glue.PySparkFlexEtlJob(stack, 'BasicPySparkFlexEtlJobv3', { + script: script, + role: iam_role, + glueVersion: glue.GlueVersion.V3_0, +}); */ + +new glue.PySparkFlexEtlJob(stack, 'OverridePySparkFlexEtlJob', { + script: script, + role: iam_role, + description: 'Optional Override PySpark Flex Etl Job', + glueVersion: glue.GlueVersion.V3_0, + numberOrWorkers: 20, + workerType: glue.WorkerType.G_1X, + timeout: cdk.Duration.minutes(15), + jobName: 'Optional Override PySpark Flex Etl Job', + defaultArguments: { + arg1: 'value1', + arg2: 'value2', + }, + tags: { + key: 'value', + }, +}); + +new integ.IntegTest(app, 'aws-glue-job-pysparkflex-etl-integ-test', { + testCases: [stack], +}); + +app.synth(); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar new file mode 100644 index 0000000000000000000000000000000000000000..41a6aa95d5aff514ba19b9a9b4c8bfff3ec123a0 GIT binary patch literal 782 zcmWIWW@Zs#;Nak3*vZlp!GHuf8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1gMK3ugvA8%lz}Ne*gFx-=CBBp0qNAhjxvK;gu@`>~ z`>52>zR1ayf1cUPplvyuRMi&Qd-HS2{bBs4QMj$k@x`HcXZBSZe@?U8f6tylX5ssV zZOgjF!d_)mzeu?1c-^soVVO?w%=rcp2mkRl*RDF=w9s zc%joEIcMfi%~`9Zb)>khS6q5_X61#YiU%G{x~_iuL>T|Q1M^NUWW5kQyICwy>O=j? z>M2", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json new file mode 100644 index 0000000000000..22bd76fefdc70 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/integ.json new file mode 100644 index 0000000000000..694662c13ef3a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-job-scalasparkflex-etl" + ], + "assertionStack": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json new file mode 100644 index 0000000000000..62e439eab23c2 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-job-scalasparkflex-etl.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-job-scalasparkflex-etl.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-job-scalasparkflex-etl": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-job-scalasparkflex-etl.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/5a817debdb277ddb49716d89986520ce01e14c36661ccf39e5457466dccbf687.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-job-scalasparkflex-etl.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-job-scalasparkflex-etl.assets" + ], + "metadata": { + "/aws-glue-job-scalasparkflex-etl/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-job-scalasparkflex-etl/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicScalaSparkFlexEtlJobF8FD9EFB" + } + ], + "/aws-glue-job-scalasparkflex-etl/OverrideScalaSparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OverrideScalaSparkFlexEtlJob843D93B4" + } + ], + "/aws-glue-job-scalasparkflex-etl/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-scalasparkflex-etl/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-scalasparkflex-etl" + }, + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets" + ], + "metadata": { + "/aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json new file mode 100644 index 0000000000000..194df8aef60b3 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json @@ -0,0 +1,375 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-job-scalasparkflex-etl": { + "id": "aws-glue-job-scalasparkflex-etl", + "path": "aws-glue-job-scalasparkflex-etl", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicScalaSparkFlexEtlJob": { + "id": "BasicScalaSparkFlexEtlJob", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob", + "children": { + "Codeb58a68516710fd95a65c427a7e567405": { + "id": "Codeb58a68516710fd95a65c427a7e567405", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Codeb58a68516710fd95a65c427a7e567405", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Codeb58a68516710fd95a65c427a7e567405/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Codeb58a68516710fd95a65c427a7e567405/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "scala", + "--class": "com.example.HelloWorld", + "--enable-metrics": "" + }, + "executionClass": "FLEX", + "glueVersion": "3.0", + "numberOfWorkers": 10, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ScalaSparkFlexEtlJob", + "version": "0.0.0" + } + }, + "OverrideScalaSparkFlexEtlJob": { + "id": "OverrideScalaSparkFlexEtlJob", + "path": "aws-glue-job-scalasparkflex-etl/OverrideScalaSparkFlexEtlJob", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/OverrideScalaSparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "scala", + "--class": "com.example.HelloWorld", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "description": "Optional Override ScalaSpark Flex Etl Job", + "executionClass": "FLEX", + "glueVersion": "3.0", + "name": "Optional Override ScalaSpark Flex Etl Job", + "numberOfWorkers": 20, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "timeout": 15, + "workerType": "G.1X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ScalaSparkFlexEtlJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-job-scalasparkflex-etl-integ-test": { + "id": "aws-glue-job-scalasparkflex-etl-integ-test", + "path": "aws-glue-job-scalasparkflex-etl-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts new file mode 100644 index 0000000000000..0a69d3f5a517d --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts @@ -0,0 +1,63 @@ +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as path from 'path'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; + +/** + * To verify the ability to run jobs created in this test + * + * Run the job using + * `aws glue start-job-run --region us-east-1 --job-name ` + * This will return a runId + * + * Get the status of the job run using + * `aws glue get-job-run --region us-east-1 --job-name --run-id ` + * + * For example, to test the ShellJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ShellJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ShellJob --run-id ` + * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" + */ + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'aws-glue-job-scalasparkflex-etl'); + +const jar_file = glue.Code.fromAsset(path.join(__dirname, 'job-jar', 'helloworld.jar')); +const job_class ='com.example.HelloWorld'; + +const iam_role = new iam.Role(stack, 'IAMServiceRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], +}); + +new glue.ScalaSparkFlexEtlJob(stack, 'BasicScalaSparkFlexEtlJob', { + script: jar_file, + role: iam_role, + className: job_class, +}); + +new glue.ScalaSparkFlexEtlJob(stack, 'OverrideScalaSparkFlexEtlJob', { + script: jar_file, + className: job_class, + role: iam_role, + description: 'Optional Override ScalaSpark Flex Etl Job', + glueVersion: glue.GlueVersion.V3_0, + numberOrWorkers: 20, + workerType: glue.WorkerType.G_1X, + timeout: cdk.Duration.minutes(15), + jobName: 'Optional Override ScalaSpark Flex Etl Job', + defaultArguments: { + arg1: 'value1', + arg2: 'value2', + }, + tags: { + key: 'value', + }, +}); + +new integ.IntegTest(app, 'aws-glue-job-scalasparkflex-etl-integ-test', { + testCases: [stack], +}); + +app.synth(); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar b/packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar new file mode 100644 index 0000000000000000000000000000000000000000..41a6aa95d5aff514ba19b9a9b4c8bfff3ec123a0 GIT binary patch literal 782 zcmWIWW@Zs#;Nak3*vZlp!GHuf8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1gMK3ugvA8%lz}Ne*gFx-=CBBp0qNAhjxvK;gu@`>~ z`>52>zR1ayf1cUPplvyuRMi&Qd-HS2{bBs4QMj$k@x`HcXZBSZe@?U8f6tylX5ssV zZOgjF!d_)mzeu?1c-^soVVO?w%=rcp2mkRl*RDF=w9s zc%joEIcMfi%~`9Zb)>khS6q5_X61#YiU%G{x~_iuL>T|Q1M^NUWW5kQyICwy>O=j? z>M2 { + let stack: cdk.Stack; + let role: iam.IRole; + let script: glue.Code; + let codeBucket: s3.IBucket; + let job: glue.IJob; + + beforeEach(() => { + stack = new cdk.Stack(); + role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); + codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + script = glue.Code.fromBucket(codeBucket, 'script'); + }); + + describe('Create new PySpark ETL Flex Job with default parameters', () => { + + beforeEach(() => { + job = new glue.PySparkFlexEtlJob(stack, 'ImportedJob', { role, script }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 3.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '3.0', + }); + }); + + test('Default WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.2X', + }); + }); + + test('ExecutionClass should be Flex', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionClass: 'FLEX', + }); + }); + }); +}); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts new file mode 100644 index 0000000000000..f9e85e9b72e4b --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts @@ -0,0 +1,57 @@ +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { Template } from 'aws-cdk-lib/assertions'; + +describe('Job', () => { + let stack: cdk.Stack; + let role: iam.IRole; + let script: glue.Code; + let codeBucket: s3.IBucket; + let job: glue.IJob; + let className: string; + + beforeEach(() => { + stack = new cdk.Stack(); + role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); + codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + script = glue.Code.fromBucket(codeBucket, 'script'); + className = 'com.example.HelloWorld'; + }); + + describe('Create new Scala Spark ETL Flex Job with default parameters', () => { + + beforeEach(() => { + job = new glue.ScalaSparkFlexEtlJob(stack, 'ImportedJob', { role, script, className }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 3.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '3.0', + }); + }); + + test('Default WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.2X', + }); + }); + + test('ExecutionClass should be Flex', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionClass: 'FLEX', + }); + }); + + }); +}); \ No newline at end of file From 6ed7021bd126281c467827d11148ee3b930da670 Mon Sep 17 00:00:00 2001 From: Prashanna B Date: Fri, 12 Apr 2024 08:24:23 +0000 Subject: [PATCH 11/51] Modifications to comments --- .../aws-glue-alpha/test/integ.job-pysparkflex-etl.ts | 6 +++--- .../aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts index ce3145dc0ff2b..09e662c579791 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts @@ -14,9 +14,9 @@ import * as iam from 'aws-cdk-lib/aws-iam'; * Get the status of the job run using * `aws glue get-job-run --region us-east-1 --job-name --run-id ` * - * For example, to test the ShellJob - * - Run: `aws glue start-job-run --region us-east-1 --job-name ShellJob` - * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ShellJob --run-id ` + * For example, to test the ETLJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ETLJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ETLJob --run-id ` * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" */ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts index 0a69d3f5a517d..58c1bf7a36348 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts @@ -14,9 +14,9 @@ import * as iam from 'aws-cdk-lib/aws-iam'; * Get the status of the job run using * `aws glue get-job-run --region us-east-1 --job-name --run-id ` * - * For example, to test the ShellJob - * - Run: `aws glue start-job-run --region us-east-1 --job-name ShellJob` - * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ShellJob --run-id ` + * For example, to test the ETLJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ETLJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ETLJob --run-id ` * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" */ From 087f41160c8e0af92de6ab1270407efeb3e61ab0 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Mon, 15 Apr 2024 17:39:50 +0000 Subject: [PATCH 12/51] Scala Spark Streaming Job class --- .../aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts index d664a1c457ed9..00a1291d6aaf4 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts @@ -100,6 +100,11 @@ export class ScalaSparkStreamingJob extends Job { // Gather executable arguments const executableArgs = this.executableArguments(props); + // Mandatory className argument + if (props.className === undefined) { + throw new Error('className must be set for Scala ETL Jobs'); + } + // Conbine command line arguments into a single line item const defaultArguments = { ...executableArgs, @@ -146,7 +151,8 @@ export class ScalaSparkStreamingJob extends Job { */ private executableArguments(props: ScalaSparkStreamingJobProps) { const args: { [key: string]: string } = {}; - args['--job-language'] = JobLanguage.PYTHON; + args['--job-language'] = JobLanguage.SCALA; + args['--class'] = props.className!; // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any if (props.extraJars && props.extraJars?.length > 0) { From 23a76a982b7545ed315cdbcdd0fcad45c688f0d1 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Mon, 15 Apr 2024 18:18:44 +0000 Subject: [PATCH 13/51] Streaming Jobs - integration tests --- packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 2 + .../test/integ.job-pyspark-streaming.ts | 60 ++++ .../test/integ.job-scalaspark-streaming.ts | 63 ++++ .../test/job-jar/helloworld.jar | Bin 0 -> 782 bytes .../cli-lib-alpha/THIRD_PARTY_LICENSES | 269 ++++++++++++------ 5 files changed, 313 insertions(+), 81 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index 5c5d13f9b5c76..b67995af632ab 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -19,6 +19,8 @@ export * from './jobs/pyspark-etl-job'; // export * from './jobs/python-shell-job'; // export * from './jobs/ray-job'; // export * from './jobs/scala-spark-etl-job'; +export * from './jobs/pyspark-streaming-job'; +export * from './jobs/scala-spark-streaming-job'; export * from './jobs/spark-ui-utils'; // export * from './jobs/spark-etl-job'; //export * from './jobs/streaming-job'; diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts new file mode 100644 index 0000000000000..55ee3f59a7882 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts @@ -0,0 +1,60 @@ +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as path from 'path'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; + +/** + * To verify the ability to run jobs created in this test + * + * Run the job using + * `aws glue start-job-run --region us-east-1 --job-name ` + * This will return a runId + * + * Get the status of the job run using + * `aws glue get-job-run --region us-east-1 --job-name --run-id ` + * + * For example, to test the ETLJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ETLJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ETLJob --run-id ` + * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" + */ + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'aws-glue-job-pyspark-streaming'); + +const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); + +const iam_role = new iam.Role(stack, 'IAMServiceRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], +}); + +new glue.pySparkStreamingJob(stack, 'BasicPySparkStreamingJob', { + script: script, + role: iam_role, +}); + +new glue.pySparkStreamingJob(stack, 'OverridePySparkStreamingJob', { + script: script, + role: iam_role, + description: 'Optional Override PySpark Streaming Job', + glueVersion: glue.GlueVersion.V3_0, + numberOrWorkers: 20, + workerType: glue.WorkerType.G_2X, + timeout: cdk.Duration.minutes(15), + jobName: 'Optional Override PySpark Streaming Job', + defaultArguments: { + arg1: 'value1', + arg2: 'value2', + }, + tags: { + key: 'value', + }, +}); + +new integ.IntegTest(app, 'aws-glue-job-pyspark-streaming-integ-test', { + testCases: [stack], +}); + +app.synth(); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.ts new file mode 100644 index 0000000000000..277426faa0832 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.ts @@ -0,0 +1,63 @@ +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as path from 'path'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; + +/** + * To verify the ability to run jobs created in this test + * + * Run the job using + * `aws glue start-job-run --region us-east-1 --job-name ` + * This will return a runId + * + * Get the status of the job run using + * `aws glue get-job-run --region us-east-1 --job-name --run-id ` + * + * For example, to test the ETLJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ETLJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ETLJob --run-id ` + * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" + */ + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'aws-glue-job-scalaspark-streaming'); + +const jar_file = glue.Code.fromAsset(path.join(__dirname, 'job-jar', 'helloworld.jar')); +const job_class ='com.example.HelloWorld'; + +const iam_role = new iam.Role(stack, 'IAMServiceRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], +}); + +new glue.ScalaSparkStreamingJob(stack, 'BasicScalaSparkStreamingJob', { + script: jar_file, + role: iam_role, + className: job_class, +}); + +new glue.ScalaSparkStreamingJob(stack, 'OverrideScalaSparkStreamingJob', { + script: jar_file, + className: job_class, + role: iam_role, + description: 'Optional Override ScalaSpark Streaming Job', + glueVersion: glue.GlueVersion.V3_0, + numberOrWorkers: 20, + workerType: glue.WorkerType.G_2X, + timeout: cdk.Duration.minutes(15), + jobName: 'Optional Override ScalaSpark Streaming Job', + defaultArguments: { + arg1: 'value1', + arg2: 'value2', + }, + tags: { + key: 'value', + }, +}); + +new integ.IntegTest(app, 'aws-glue-job-scalaspark-streaming-integ-test', { + testCases: [stack], +}); + +app.synth(); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar b/packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar new file mode 100644 index 0000000000000000000000000000000000000000..41a6aa95d5aff514ba19b9a9b4c8bfff3ec123a0 GIT binary patch literal 782 zcmWIWW@Zs#;Nak3*vZlp!GHuf8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1gMK3ugvA8%lz}Ne*gFx-=CBBp0qNAhjxvK;gu@`>~ z`>52>zR1ayf1cUPplvyuRMi&Qd-HS2{bBs4QMj$k@x`HcXZBSZe@?U8f6tylX5ssV zZOgjF!d_)mzeu?1c-^soVVO?w%=rcp2mkRl*RDF=w9s zc%joEIcMfi%~`9Zb)>khS6q5_X61#YiU%G{x~_iuL>T|Q1M^NUWW5kQyICwy>O=j? z>M2 @@ -1143,38 +1143,37 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI ---------------- -** diff@5.1.0 - https://www.npmjs.com/package/diff/v/5.1.0 | BSD-3-Clause -Software License Agreement (BSD License) +** diff@5.2.0 - https://www.npmjs.com/package/diff/v/5.2.0 | BSD-3-Clause +BSD 3-Clause License Copyright (c) 2009-2015, Kevin Decker - All rights reserved. -Redistribution and use of this software in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above - copyright notice, this list of conditions and the - following disclaimer. +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the - following disclaimer in the documentation and/or other - materials provided with the distribution. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. -* Neither the name of Kevin Decker nor the names of its - contributors may be used to endorse or promote products - derived from this software without specific prior - written permission. +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR -IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ---------------- @@ -1228,7 +1227,7 @@ THE SOFTWARE. ---------------- -** escalade@3.1.1 - https://www.npmjs.com/package/escalade/v/3.1.1 | MIT +** escalade@3.1.2 - https://www.npmjs.com/package/escalade/v/3.1.2 | MIT MIT License Copyright (c) Luke Edwards (lukeed.com) @@ -1420,7 +1419,7 @@ THE SOFTWARE. ---------------- -** fs-extra@8.1.0 - https://www.npmjs.com/package/fs-extra/v/8.1.0 | MIT +** fs-extra@11.2.0 - https://www.npmjs.com/package/fs-extra/v/11.2.0 | MIT (The MIT License) Copyright (c) 2011-2017 JP Richardson @@ -1512,7 +1511,7 @@ the licensed code: ---------------- -** get-uri@6.0.2 - https://www.npmjs.com/package/get-uri/v/6.0.2 | MIT +** get-uri@6.0.3 - https://www.npmjs.com/package/get-uri/v/6.0.3 | MIT (The MIT License) Copyright (c) 2014 Nathan Rajlich @@ -1618,13 +1617,10 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI ---------------- -** http-proxy-agent@7.0.0 - https://www.npmjs.com/package/http-proxy-agent/v/7.0.0 | MIT -License -------- - +** http-proxy-agent@7.0.2 - https://www.npmjs.com/package/http-proxy-agent/v/7.0.2 | MIT (The MIT License) -Copyright (c) 2013 Nathan Rajlich <nathan@tootallnate.net> +Copyright (c) 2013 Nathan Rajlich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -1648,7 +1644,29 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ---------------- -** https-proxy-agent@7.0.2 - https://www.npmjs.com/package/https-proxy-agent/v/7.0.2 | MIT +** https-proxy-agent@7.0.4 - https://www.npmjs.com/package/https-proxy-agent/v/7.0.4 | MIT +(The MIT License) + +Copyright (c) 2013 Nathan Rajlich + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +'Software'), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ---------------- @@ -1693,11 +1711,27 @@ PERFORMANCE OF THIS SOFTWARE. ---------------- -** ip@1.1.8 - https://www.npmjs.com/package/ip/v/1.1.8 | MIT +** ip-address@9.0.5 - https://www.npmjs.com/package/ip-address/v/9.0.5 | MIT +Copyright (C) 2011 by Beau Gunderson ----------------- +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. -** ip@2.0.0 - https://www.npmjs.com/package/ip/v/2.0.0 | MIT ---------------- @@ -1829,22 +1863,47 @@ limitations under the License. ---------------- -** jsonfile@4.0.0 - https://www.npmjs.com/package/jsonfile/v/4.0.0 | MIT -(The MIT License) +** jsbn@1.1.0 - https://www.npmjs.com/package/jsbn/v/1.1.0 | MIT +Licensing +--------- -Copyright (c) 2012-2015, JP Richardson +This software is covered under the following copyright: -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files -(the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, - merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: +/* + * Copyright (c) 2003-2005 Tom Wu + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, + * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY + * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + * + * IN NO EVENT SHALL TOM WU BE LIABLE FOR ANY SPECIAL, INCIDENTAL, + * INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES WHATSOEVER + * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT ADVISED OF + * THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * In addition, the following condition applies: + * + * All redistributions must retain an intact copy of this copyright notice + * and disclaimer. + */ -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +Address all questions regarding this license to: -THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS -OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + Tom Wu + tjw@cs.Stanford.EDU ---------------- @@ -2419,7 +2478,29 @@ IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ---------------- -** pac-resolver@7.0.0 - https://www.npmjs.com/package/pac-resolver/v/7.0.0 | MIT +** pac-resolver@7.0.1 - https://www.npmjs.com/package/pac-resolver/v/7.0.1 | MIT +(The MIT License) + +Copyright (c) 2013 Nathan Rajlich + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +'Software'), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ---------------- @@ -2505,7 +2586,29 @@ THE SOFTWARE. ---------------- -** proxy-agent@6.3.1 - https://www.npmjs.com/package/proxy-agent/v/6.3.1 | MIT +** proxy-agent@6.4.0 - https://www.npmjs.com/package/proxy-agent/v/6.4.0 | MIT +(The MIT License) + +Copyright (c) 2013 Nathan Rajlich + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +'Software'), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ---------------- @@ -3014,7 +3117,7 @@ Copyright (c) 2010-2022 Mathias Bynens ---------------- -** semver@7.5.4 - https://www.npmjs.com/package/semver/v/7.5.4 | ISC +** semver@7.6.0 - https://www.npmjs.com/package/semver/v/7.6.0 | ISC The ISC License Copyright (c) Isaac Z. Schlueter and Contributors @@ -3078,7 +3181,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ---------------- -** socks@2.7.1 - https://www.npmjs.com/package/socks/v/2.7.1 | MIT +** socks@2.7.3 - https://www.npmjs.com/package/socks/v/2.7.3 | MIT The MIT License (MIT) Copyright (c) 2013 Josh Glazebrook @@ -3138,6 +3241,35 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +---------------- + +** sprintf-js@1.1.3 - https://www.npmjs.com/package/sprintf-js/v/1.1.3 | BSD-3-Clause +Copyright (c) 2007-present, Alexandru Mărășteanu +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +* Neither the name of this software nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------- ** string_decoder@1.1.1 - https://www.npmjs.com/package/string_decoder/v/1.1.1 | MIT @@ -3382,31 +3514,6 @@ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ----------------- - -** universalify@0.1.2 - https://www.npmjs.com/package/universalify/v/0.1.2 | MIT -(The MIT License) - -Copyright (c) 2017, Ryan Zimmerman - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the 'Software'), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - ---------------- ** universalify@2.0.1 - https://www.npmjs.com/package/universalify/v/2.0.1 | MIT From ac4b45788e1aa9853bf29d1b6f5d5a82b6a1a2d1 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Mon, 15 Apr 2024 18:38:44 +0000 Subject: [PATCH 14/51] Streaming Jobs --- .../aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts | 3 ++- .../aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts | 6 +++--- .../aws-glue-alpha/test/integ.job-pyspark-streaming.ts | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index fc7824351099d..78082b6cbb726 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -39,6 +39,7 @@ export interface PySparkStreamingJobProps extends JobProperties { /** * Extra Python Files S3 URL (optional) * S3 URL where additional python dependencies are located + * @default - no extra files */ readonly extraPythonFiles?: string[]; } @@ -46,7 +47,7 @@ export interface PySparkStreamingJobProps extends JobProperties { /** * A Python Spark Streaming Glue Job */ -export class pySparkStreamingJob extends Job { +export class PySparkStreamingJob extends Job { // Implement abstract Job attributes public readonly jobArn: string; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts index 00a1291d6aaf4..92d4516a61c11 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts @@ -18,10 +18,9 @@ import * as iam from 'aws-cdk-lib/aws-iam'; import { Bucket } from 'aws-cdk-lib/aws-s3'; import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; -import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '../constants'; +import { JobType, GlueVersion, JobLanguage, WorkerType } from '../constants'; import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; - /** * Properties for creating a Scala Spark ETL job */ @@ -42,11 +41,12 @@ export interface ScalaSparkStreamingJobProps extends JobProperties { * Package and class name for the entry point of Glue job execution for * Java scripts **/ - className: string; + readonly className: string; /** * Extra Jars S3 URL (optional) * S3 URL where additional jar dependencies are located + * @default - no extra jar files */ readonly extraJars?: string[]; } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts index 55ee3f59a7882..9a640578e2768 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts @@ -30,12 +30,12 @@ const iam_role = new iam.Role(stack, 'IAMServiceRole', { managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], }); -new glue.pySparkStreamingJob(stack, 'BasicPySparkStreamingJob', { +new glue.PySparkStreamingJob(stack, 'BasicPySparkStreamingJob', { script: script, role: iam_role, }); -new glue.pySparkStreamingJob(stack, 'OverridePySparkStreamingJob', { +new glue.PySparkStreamingJob(stack, 'OverridePySparkStreamingJob', { script: script, role: iam_role, description: 'Optional Override PySpark Streaming Job', From 9d936dcda38af9f188812338c5967e67ebfd5cb0 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Mon, 15 Apr 2024 20:26:00 +0000 Subject: [PATCH 15/51] Streaming jobs CDK L2 --- .../lib/jobs/pyspark-streaming-job.ts | 2 +- .../lib/jobs/scala-spark-streaming-job.ts | 2 +- .../test/pyspark-streaming-jobs.test.ts | 54 ++++++++++++++++++ .../test/scalaspark-streaming-jobs.test.ts | 56 +++++++++++++++++++ 4 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index 78082b6cbb726..9130a0ef1295e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -117,7 +117,7 @@ export class PySparkStreamingJob extends Job { }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, workerType: props.workerType ? props.workerType : WorkerType.G_2X, - numberOfWorkers: props.numberOrWorkers, + numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, timeout: props.timeout?.toMinutes(), diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts index 92d4516a61c11..487c8f146ebd6 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts @@ -128,7 +128,7 @@ export class ScalaSparkStreamingJob extends Job { }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, workerType: props.workerType ? props.workerType : WorkerType.G_2X, - numberOfWorkers: props.numberOrWorkers, + numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, timeout: props.timeout?.toMinutes(), diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts new file mode 100644 index 0000000000000..a6ca7f2e9f472 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts @@ -0,0 +1,54 @@ +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { Template } from 'aws-cdk-lib/assertions'; + +describe('Job', () => { + let stack: cdk.Stack; + let role: iam.IRole; + let script: glue.Code; + let codeBucket: s3.IBucket; + let job: glue.IJob; + + beforeEach(() => { + stack = new cdk.Stack(); + role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); + codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + script = glue.Code.fromBucket(codeBucket, 'script'); + }); + + describe('Create new PySpark Streaming Job with default parameters', () => { + + beforeEach(() => { + job = new glue.PySparkStreamingJob(stack, 'ImportedJob', { role, script }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + + test('Default WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.2X', + }); + }); + }); +}); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts new file mode 100644 index 0000000000000..853e28dfa4224 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts @@ -0,0 +1,56 @@ +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { Template } from 'aws-cdk-lib/assertions'; + +describe('Job', () => { + let stack: cdk.Stack; + let role: iam.IRole; + let script: glue.Code; + let codeBucket: s3.IBucket; + let job: glue.IJob; + let className: string; + + beforeEach(() => { + stack = new cdk.Stack(); + role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); + codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + script = glue.Code.fromBucket(codeBucket, 'script'); + className = 'com.example.HelloWorld'; + }); + + describe('Create new Scala Spark Streaming Job with default parameters', () => { + + beforeEach(() => { + job = new glue.ScalaSparkStreamingJob(stack, 'ImportedJob', { role, script, className }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + + test('Default WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.2X', + }); + }); + }); +}); From b28dc557a9d95bb2e1a37f5406453bc82e3854f8 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Mon, 15 Apr 2024 20:51:26 +0000 Subject: [PATCH 16/51] Python Streaming Jobs - Integration test updates --- .../lib/jobs/pyspark-streaming-job.ts | 2 +- ...9be7858a12b228a2ae6e5c10faccd9097b1e855.py | 1 + ...aws-glue-job-pyspark-streaming.assets.json | 32 ++ ...s-glue-job-pyspark-streaming.template.json | 202 ++++++++++ ...efaultTestDeployAssert242E520E.assets.json | 19 + ...aultTestDeployAssert242E520E.template.json | 36 ++ .../cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 131 +++++++ .../tree.json | 371 ++++++++++++++++++ 10 files changed, 806 insertions(+), 1 deletion(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index 9130a0ef1295e..fd4174071959c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -113,7 +113,7 @@ export class PySparkStreamingJob extends Job { command: { name: JobType.STREAMING, scriptLocation: this.codeS3ObjectUrl(props.script), - pythonVersion: PythonVersion.THREE_NINE, + pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, workerType: props.workerType ? props.workerType : WorkerType.G_2X, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py new file mode 100644 index 0000000000000..e75154b7c390f --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py @@ -0,0 +1 @@ +print("hello world") \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json new file mode 100644 index 0000000000000..3cef0dba9ddd4 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json @@ -0,0 +1,32 @@ +{ + "version": "36.0.0", + "files": { + "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": { + "source": { + "path": "asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + }, + "32dbc2000ae8315bdff7bd4dd248e2d28c945a6879dfe6be766b33f41734f2a3": { + "source": { + "path": "aws-glue-job-pyspark-streaming.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "32dbc2000ae8315bdff7bd4dd248e2d28c945a6879dfe6be766b33f41734f2a3.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json new file mode 100644 index 0000000000000..f85276688bcdd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json @@ -0,0 +1,202 @@ +{ + "Resources": { + "IAMServiceRole61C662C4": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "IAMServiceRoleDefaultPolicy379D1A0E": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "Roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "BasicPySparkStreamingJobAFD3B477": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "gluestreaming", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-metrics": "" + }, + "GlueVersion": "4.0", + "NumberOfWorkers": 10, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "WorkerType": "G.2X" + } + }, + "OverridePySparkStreamingJob58DE176A": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "gluestreaming", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "Description": "Optional Override PySpark Streaming Job", + "GlueVersion": "3.0", + "Name": "Optional Override PySpark Streaming Job", + "NumberOfWorkers": 20, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "Tags": { + "key": "value" + }, + "Timeout": 15, + "WorkerType": "G.2X" + } + } + }, + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.assets.json new file mode 100644 index 0000000000000..476e000da5f03 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/integ.json new file mode 100644 index 0000000000000..e6bee2f2422a3 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-job-pyspark-streaming-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-job-pyspark-streaming" + ], + "assertionStack": "aws-glue-job-pyspark-streaming-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json new file mode 100644 index 0000000000000..77681a4e3bb1d --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-job-pyspark-streaming.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-job-pyspark-streaming.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-job-pyspark-streaming": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-job-pyspark-streaming.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/32dbc2000ae8315bdff7bd4dd248e2d28c945a6879dfe6be766b33f41734f2a3.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-job-pyspark-streaming.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-job-pyspark-streaming.assets" + ], + "metadata": { + "/aws-glue-job-pyspark-streaming/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-job-pyspark-streaming/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-job-pyspark-streaming/BasicPySparkStreamingJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicPySparkStreamingJobAFD3B477" + } + ], + "/aws-glue-job-pyspark-streaming/OverridePySparkStreamingJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OverridePySparkStreamingJob58DE176A" + } + ], + "/aws-glue-job-pyspark-streaming/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-pyspark-streaming/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-pyspark-streaming" + }, + "awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluejobpysparkstreamingintegtestDefaultTestDeployAssert242E520E.assets" + ], + "metadata": { + "/aws-glue-job-pyspark-streaming-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-pyspark-streaming-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-pyspark-streaming-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json new file mode 100644 index 0000000000000..8bcdcde9aee03 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json @@ -0,0 +1,371 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-job-pyspark-streaming": { + "id": "aws-glue-job-pyspark-streaming", + "path": "aws-glue-job-pyspark-streaming", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-pyspark-streaming/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-pyspark-streaming/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pyspark-streaming/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-pyspark-streaming/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pyspark-streaming/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicPySparkStreamingJob": { + "id": "BasicPySparkStreamingJob", + "path": "aws-glue-job-pyspark-streaming/BasicPySparkStreamingJob", + "children": { + "Code2907ea7be4a583708cfffc21b3df1dfa": { + "id": "Code2907ea7be4a583708cfffc21b3df1dfa", + "path": "aws-glue-job-pyspark-streaming/BasicPySparkStreamingJob/Code2907ea7be4a583708cfffc21b3df1dfa", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-job-pyspark-streaming/BasicPySparkStreamingJob/Code2907ea7be4a583708cfffc21b3df1dfa/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-job-pyspark-streaming/BasicPySparkStreamingJob/Code2907ea7be4a583708cfffc21b3df1dfa/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pyspark-streaming/BasicPySparkStreamingJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "gluestreaming", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-metrics": "" + }, + "glueVersion": "4.0", + "numberOfWorkers": 10, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkStreamingJob", + "version": "0.0.0" + } + }, + "OverridePySparkStreamingJob": { + "id": "OverridePySparkStreamingJob", + "path": "aws-glue-job-pyspark-streaming/OverridePySparkStreamingJob", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pyspark-streaming/OverridePySparkStreamingJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "gluestreaming", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "description": "Optional Override PySpark Streaming Job", + "glueVersion": "3.0", + "name": "Optional Override PySpark Streaming Job", + "numberOfWorkers": 20, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "timeout": 15, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkStreamingJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-pyspark-streaming/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-pyspark-streaming/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-job-pyspark-streaming-integ-test": { + "id": "aws-glue-job-pyspark-streaming-integ-test", + "path": "aws-glue-job-pyspark-streaming-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-job-pyspark-streaming-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-job-pyspark-streaming-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-job-pyspark-streaming-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-pyspark-streaming-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-pyspark-streaming-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file From 8afbb598c80e8f7020432071a286c0e248ccd8c4 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Mon, 15 Apr 2024 20:57:17 +0000 Subject: [PATCH 17/51] Scala Streaming Jobs - Integration test updates --- ...3f8703573eb6b69528c5d52190d72579c91602.jar | Bin 0 -> 782 bytes ...-glue-job-scalaspark-streaming.assets.json | 32 ++ ...lue-job-scalaspark-streaming.template.json | 202 ++++++++++ ...efaultTestDeployAssertCD3F6A81.assets.json | 19 + ...aultTestDeployAssertCD3F6A81.template.json | 36 ++ .../cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 131 +++++++ .../tree.json | 371 ++++++++++++++++++ 9 files changed, 804 insertions(+) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar new file mode 100644 index 0000000000000000000000000000000000000000..41a6aa95d5aff514ba19b9a9b4c8bfff3ec123a0 GIT binary patch literal 782 zcmWIWW@Zs#;Nak3*vZlp!GHuf8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1gMK3ugvA8%lz}Ne*gFx-=CBBp0qNAhjxvK;gu@`>~ z`>52>zR1ayf1cUPplvyuRMi&Qd-HS2{bBs4QMj$k@x`HcXZBSZe@?U8f6tylX5ssV zZOgjF!d_)mzeu?1c-^soVVO?w%=rcp2mkRl*RDF=w9s zc%joEIcMfi%~`9Zb)>khS6q5_X61#YiU%G{x~_iuL>T|Q1M^NUWW5kQyICwy>O=j? z>M2", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.assets.json new file mode 100644 index 0000000000000..867fa0e23043d --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/integ.json new file mode 100644 index 0000000000000..179bc5aa9c605 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-job-scalaspark-streaming-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-job-scalaspark-streaming" + ], + "assertionStack": "aws-glue-job-scalaspark-streaming-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json new file mode 100644 index 0000000000000..0c5491440ec91 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-job-scalaspark-streaming.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-job-scalaspark-streaming.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-job-scalaspark-streaming": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-job-scalaspark-streaming.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/c7b60611a3ef84ec57bf6098baf969990922367a029d69e2aecede0c306a66cc.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-job-scalaspark-streaming.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-job-scalaspark-streaming.assets" + ], + "metadata": { + "/aws-glue-job-scalaspark-streaming/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-job-scalaspark-streaming/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-job-scalaspark-streaming/BasicScalaSparkStreamingJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicScalaSparkStreamingJob03E183FE" + } + ], + "/aws-glue-job-scalaspark-streaming/OverrideScalaSparkStreamingJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OverrideScalaSparkStreamingJob598931ED" + } + ], + "/aws-glue-job-scalaspark-streaming/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-scalaspark-streaming/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-scalaspark-streaming" + }, + "awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluejobscalasparkstreamingintegtestDefaultTestDeployAssertCD3F6A81.assets" + ], + "metadata": { + "/aws-glue-job-scalaspark-streaming-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-scalaspark-streaming-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-scalaspark-streaming-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json new file mode 100644 index 0000000000000..031e0e13c6ee8 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json @@ -0,0 +1,371 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-job-scalaspark-streaming": { + "id": "aws-glue-job-scalaspark-streaming", + "path": "aws-glue-job-scalaspark-streaming", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-scalaspark-streaming/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-scalaspark-streaming/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalaspark-streaming/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-scalaspark-streaming/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalaspark-streaming/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicScalaSparkStreamingJob": { + "id": "BasicScalaSparkStreamingJob", + "path": "aws-glue-job-scalaspark-streaming/BasicScalaSparkStreamingJob", + "children": { + "Codeb58a68516710fd95a65c427a7e567405": { + "id": "Codeb58a68516710fd95a65c427a7e567405", + "path": "aws-glue-job-scalaspark-streaming/BasicScalaSparkStreamingJob/Codeb58a68516710fd95a65c427a7e567405", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-job-scalaspark-streaming/BasicScalaSparkStreamingJob/Codeb58a68516710fd95a65c427a7e567405/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-job-scalaspark-streaming/BasicScalaSparkStreamingJob/Codeb58a68516710fd95a65c427a7e567405/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalaspark-streaming/BasicScalaSparkStreamingJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "gluestreaming", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" + ] + ] + } + }, + "defaultArguments": { + "--job-language": "scala", + "--class": "com.example.HelloWorld", + "--enable-metrics": "" + }, + "glueVersion": "4.0", + "numberOfWorkers": 10, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ScalaSparkStreamingJob", + "version": "0.0.0" + } + }, + "OverrideScalaSparkStreamingJob": { + "id": "OverrideScalaSparkStreamingJob", + "path": "aws-glue-job-scalaspark-streaming/OverrideScalaSparkStreamingJob", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalaspark-streaming/OverrideScalaSparkStreamingJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "gluestreaming", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" + ] + ] + } + }, + "defaultArguments": { + "--job-language": "scala", + "--class": "com.example.HelloWorld", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "description": "Optional Override ScalaSpark Streaming Job", + "glueVersion": "3.0", + "name": "Optional Override ScalaSpark Streaming Job", + "numberOfWorkers": 20, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "timeout": 15, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ScalaSparkStreamingJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-scalaspark-streaming/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-scalaspark-streaming/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-job-scalaspark-streaming-integ-test": { + "id": "aws-glue-job-scalaspark-streaming-integ-test", + "path": "aws-glue-job-scalaspark-streaming-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-job-scalaspark-streaming-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-job-scalaspark-streaming-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-job-scalaspark-streaming-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-scalaspark-streaming-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-scalaspark-streaming-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file From ffca35734bd268d2d1d612a19c77729f4e30ff26 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 16 Apr 2024 14:56:12 +0000 Subject: [PATCH 18/51] Python Streaming Job updates --- .../aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index fd4174071959c..f09d2038e40fc 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -102,9 +102,9 @@ export class PySparkStreamingJob extends Job { ...this.checkNoReservedArgs(props.defaultArguments), }; - if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { - throw new Error('Both workerType and numberOrWorkers must be set'); - } + // if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { + // throw new Error('Both workerType and numberOrWorkers must be set'); + // } const jobResource = new CfnJob(this, 'Resource', { name: props.jobName, From 1da551963ab95a4bd73e177f8c857a67388fc0a4 Mon Sep 17 00:00:00 2001 From: Prashanna B Date: Thu, 18 Apr 2024 08:38:51 +0000 Subject: [PATCH 19/51] Modifications to scala & pyspark flex etl jobs based on PR review --- .../lib/jobs/pysparkflex-etl-job.ts | 59 ++++++++----- .../lib/jobs/scala-spark-flex-etl-job.ts | 87 ++++++++++++------- ...ws-glue-job-scalasparkflex-etl.assets.json | 4 +- ...-glue-job-scalasparkflex-etl.template.json | 2 - .../manifest.json | 2 +- .../tree.json | 6 +- 6 files changed, 100 insertions(+), 60 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts index 296d1947524e1..c3ae840c46ca8 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts @@ -5,6 +5,8 @@ import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType, ExecutionClass } from '../constants'; import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; +import * as cdk from 'aws-cdk-lib/core'; +import { Code } from '../code'; /** * Flex Jobs class @@ -33,11 +35,29 @@ export interface PySparkFlexEtlJobProps extends JobProperties { readonly sparkUI?: SparkUIProps; /** - * Extra Python Files S3 URL (optional) - * S3 URL where additional python dependencies are located - * @default - no extra files + * Specifies configuration properties of a notification (optional). + * After a job run starts, the number of minutes to wait before sending a job run delay notification. + * @default - undefined */ - readonly extraPythonFiles?: string[]; + readonly notifyDelayAfter?: cdk.Duration; + + /** + * Additional Python files that AWS Glue adds to the Python path before executing your script. + * + * @default - no extra python files specified. + * + * @see `--extra-py-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly extraPythonFiles?: Code[]; + + /** + * Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it. + * + * @default - no extra files specified. + * + * @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly extraFiles?: Code[]; } @@ -99,10 +119,6 @@ export class PySparkFlexEtlJob extends Job { ...this.checkNoReservedArgs(props.defaultArguments), }; - /*if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { - throw new Error('Both workerType and numberOrWorkers must be set'); - } */ - const jobResource = new CfnJob(this, 'Resource', { name: props.jobName, description: props.description, @@ -117,7 +133,7 @@ export class PySparkFlexEtlJob extends Job { numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, - //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, + notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, timeout: props.timeout?.toMinutes(), connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, securityConfiguration: props.securityConfiguration?.securityConfigurationName, @@ -132,7 +148,7 @@ export class PySparkFlexEtlJob extends Job { } /** - * Set the executable arguments with best practices enabled by default + *Set the executable arguments with best practices enabled by default * * @param props * @returns An array of arguments for Glue to use on execution @@ -141,24 +157,23 @@ export class PySparkFlexEtlJob extends Job { const args: { [key: string]: string } = {}; args['--job-language'] = JobLanguage.PYTHON; - // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { - //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + if (props.extraFiles && props.extraFiles.length > 0) { + args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); } - - // if (props.extraJars && props.extraJars?.length > 0) { - // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraFiles && props.extraFiles.length > 0) { - // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraJarsFirst) { - // args['--user-jars-first'] = 'true'; - // } return args; } + /** + * Set the arguments for sparkUI with best practices enabled by default + * + * @param sparkUiProps, role + * @returns An array of arguments for enabling sparkUI + */ + private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { validateSparkUiPrefix(sparkUiProps.prefix); diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts index dc46dec4aeaae..75a1272dcc548 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts @@ -15,8 +15,10 @@ import { Bucket } from 'aws-cdk-lib/aws-s3'; import { CfnJob } from 'aws-cdk-lib/aws-glue'; import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; -import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType, ExecutionClass } from '../constants'; +import { JobType, GlueVersion, JobLanguage, WorkerType, ExecutionClass } from '../constants'; import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; +import * as cdk from 'aws-cdk-lib/core'; +import { Code } from '../code'; /** * Flex Jobs class @@ -45,22 +47,52 @@ export interface ScalaSparkFlexEtlJobProps extends JobProperties { readonly sparkUI?: SparkUIProps; /** - * Extra Python Files S3 URL (optional) - * S3 URL where additional python dependencies are located - * @default - no extra files + * Specifies configuration properties of a notification (optional). + * After a job run starts, the number of minutes to wait before sending a job run delay notification. + * @default - undefined */ - readonly extraPythonFiles?: string[]; + readonly notifyDelayAfter?: cdk.Duration; /** - * Scala class to be passed as Default Argument to the ETL job - * @default - your scala class + * The fully qualified Scala class name that serves as the entry point for the job. + * + * @see `--class` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly className: string; + + /** + * Additional Java .jar files that AWS Glue adds to the Java classpath before executing your script. + * Only individual files are supported, directories are not supported. + * + * @default [] - no extra jars are added to the classpath + * + * @see `--extra-jars` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html */ - readonly className?: string; + readonly extraJars?: Code[]; + + /** + * Setting this value to true prioritizes the customer's extra JAR files in the classpath. + * + * @default false - priority is not given to user-provided jars + * + * @see `--user-jars-first` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly extraJarsFirst?: boolean; + + /** + * Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it. + * Only individual files are supported, directories are not supported. + * + * @default [] - no extra files are copied to the working directory + * + * @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly extraFiles?: Code[]; } /** - * A Python Spark ETL Glue Job + * A Scala Spark ETL Glue Job */ export class ScalaSparkFlexEtlJob extends Job { @@ -79,7 +111,7 @@ export class ScalaSparkFlexEtlJob extends Job { public readonly sparkUILoggingLocation?: SparkUILoggingLocation; /** - * PySparkFlexEtlJob constructor + * ScalaSparkFlexEtlJob constructor * * @param scope * @param id @@ -121,10 +153,6 @@ export class ScalaSparkFlexEtlJob extends Job { ...this.checkNoReservedArgs(props.defaultArguments), }; - /*if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { - throw new Error('Both workerType and numberOrWorkers must be set'); - } */ - const jobResource = new CfnJob(this, 'Resource', { name: props.jobName, description: props.description, @@ -132,14 +160,13 @@ export class ScalaSparkFlexEtlJob extends Job { command: { name: JobType.ETL, scriptLocation: this.codeS3ObjectUrl(props.script), - pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, workerType: props.workerType ? props.workerType : WorkerType.G_2X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, - //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, + notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, timeout: props.timeout?.toMinutes(), connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, securityConfiguration: props.securityConfiguration?.securityConfigurationName, @@ -164,24 +191,26 @@ export class ScalaSparkFlexEtlJob extends Job { args['--job-language'] = JobLanguage.SCALA; args['--class'] = props.className!; - // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any - if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { - //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + if (props.extraJars && props.extraJars?.length > 0) { + args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); } - // if (props.extraJars && props.extraJars?.length > 0) { - // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraFiles && props.extraFiles.length > 0) { - // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraJarsFirst) { - // args['--user-jars-first'] = 'true'; - // } - + if (props.extraFiles && props.extraFiles.length > 0) { + args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + if (props.extraJarsFirst) { + args['--user-jars-first'] = 'true'; + } return args; + } + /** + * Set the arguments for sparkUI with best practices enabled by default + * + * @param sparkUiProps, role + * @returns An array of arguments for enabling sparkUI + */ private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { validateSparkUiPrefix(sparkUiProps.prefix); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json index 54675e70120e2..a01a92191d2dd 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "5a817debdb277ddb49716d89986520ce01e14c36661ccf39e5457466dccbf687": { + "4e827540a0ec17542c449dee42f9232d04eedd33c52dc7d704053aa70635b75d": { "source": { "path": "aws-glue-job-scalasparkflex-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "5a817debdb277ddb49716d89986520ce01e14c36661ccf39e5457466dccbf687.json", + "objectKey": "4e827540a0ec17542c449dee42f9232d04eedd33c52dc7d704053aa70635b75d.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json index 9754e812f9cff..9bbff43bace40 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json @@ -93,7 +93,6 @@ "Properties": { "Command": { "Name": "glueetl", - "PythonVersion": "3", "ScriptLocation": { "Fn::Join": [ "", @@ -129,7 +128,6 @@ "Properties": { "Command": { "Name": "glueetl", - "PythonVersion": "3", "ScriptLocation": { "Fn::Join": [ "", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json index 62e439eab23c2..4342aeb236671 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/5a817debdb277ddb49716d89986520ce01e14c36661ccf39e5457466dccbf687.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/4e827540a0ec17542c449dee42f9232d04eedd33c52dc7d704053aa70635b75d.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json index 194df8aef60b3..1e83043814cff 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json @@ -189,8 +189,7 @@ "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" ] ] - }, - "pythonVersion": "3" + } }, "defaultArguments": { "--job-language": "scala", @@ -243,8 +242,7 @@ "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" ] ] - }, - "pythonVersion": "3" + } }, "defaultArguments": { "--job-language": "scala", From 1762b468bd56c08d4616435d2f6c942cc907aa7c Mon Sep 17 00:00:00 2001 From: Askar Serikov Date: Mon, 22 Apr 2024 19:33:27 +0000 Subject: [PATCH 20/51] Modified the job definition to add logging and metrics. Added unit and integ tests. --- .../aws-glue-alpha/lib/jobs/ray-job.ts | 9 +- ...9be7858a12b228a2ae6e5c10faccd9097b1e855.py | 1 + .../aws-glue-ray-job.assets.json | 32 ++ .../aws-glue-ray-job.template.json | 198 ++++++++++ ...efaultTestDeployAssert7A3FC747.assets.json | 19 + ...aultTestDeployAssert7A3FC747.template.json | 36 ++ .../test/integ.ray-job.js.snapshot/cdk.out | 1 + .../test/integ.ray-job.js.snapshot/integ.json | 12 + .../integ.ray-job.js.snapshot/manifest.json | 131 +++++++ .../test/integ.ray-job.js.snapshot/tree.json | 367 ++++++++++++++++++ .../aws-glue-alpha/test/integ.ray-job.ts | 56 +++ .../aws-glue-alpha/test/ray-job.test.ts | 55 +++ 12 files changed, 915 insertions(+), 2 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts index 12cf626d33d92..a7a3d0a4bbdc1 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts @@ -60,9 +60,15 @@ export class RayJob extends Job { }; this.grantPrincipal = this.role; + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const profilingMetricsArgs = { '--enable-metrics': '' }; + // Combine command line arguments into a single line item const defaultArguments = { ...this.checkNoReservedArgs(props.defaultArguments), + ...continuousLoggingArgs, + ...profilingMetricsArgs, }; if (props.workerType && props.workerType !== WorkerType.Z_2X) { @@ -84,10 +90,9 @@ export class RayJob extends Job { }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, workerType: props.workerType ? props.workerType : WorkerType.Z_2X, - numberOfWorkers: props.numberOrWorkers, + numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers: 3, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, - //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, timeout: props.timeout?.toMinutes(), connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, securityConfiguration: props.securityConfiguration?.securityConfigurationName, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py new file mode 100644 index 0000000000000..e75154b7c390f --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py @@ -0,0 +1 @@ +print("hello world") \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json new file mode 100644 index 0000000000000..02da523f19100 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json @@ -0,0 +1,32 @@ +{ + "version": "36.0.0", + "files": { + "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": { + "source": { + "path": "asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + }, + "a8fda7378f7edbed186ceae92474d861037062e15e578b62a4ba9cfdb80a57b8": { + "source": { + "path": "aws-glue-ray-job.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "a8fda7378f7edbed186ceae92474d861037062e15e578b62a4ba9cfdb80a57b8.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json new file mode 100644 index 0000000000000..806a589c73b7c --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json @@ -0,0 +1,198 @@ +{ + "Resources": { + "IAMServiceRole61C662C4": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "IAMServiceRoleDefaultPolicy379D1A0E": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "Roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "BasicRayJobF8D69550": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueray", + "Runtime": "Ray2.4", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--enable-metrics": "" + }, + "GlueVersion": "4.0", + "NumberOfWorkers": 3, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "WorkerType": "Z.2X" + } + }, + "RayJob5Workers11381A2E": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueray", + "Runtime": "Ray2.4", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "arg1": "value1", + "arg2": "value2", + "--enable-metrics": "" + }, + "GlueVersion": "4.0", + "Name": "RayJobWith5Workers", + "NumberOfWorkers": 5, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "Tags": { + "key": "value" + }, + "WorkerType": "Z.2X" + } + } + }, + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.assets.json new file mode 100644 index 0000000000000..277f637073ffd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/integ.json new file mode 100644 index 0000000000000..38d8633d3f555 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-ray-job-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-ray-job" + ], + "assertionStack": "aws-glue-ray-job-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json new file mode 100644 index 0000000000000..aa92e327c484f --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-ray-job.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-ray-job.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-ray-job": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-ray-job.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/a8fda7378f7edbed186ceae92474d861037062e15e578b62a4ba9cfdb80a57b8.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-ray-job.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-ray-job.assets" + ], + "metadata": { + "/aws-glue-ray-job/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-ray-job/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-ray-job/BasicRayJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicRayJobF8D69550" + } + ], + "/aws-glue-ray-job/RayJob5Workers/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "RayJob5Workers11381A2E" + } + ], + "/aws-glue-ray-job/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-ray-job/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-ray-job" + }, + "awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluerayjobintegtestDefaultTestDeployAssert7A3FC747.assets" + ], + "metadata": { + "/aws-glue-ray-job-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-ray-job-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-ray-job-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json new file mode 100644 index 0000000000000..22326c8095593 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json @@ -0,0 +1,367 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-ray-job": { + "id": "aws-glue-ray-job", + "path": "aws-glue-ray-job", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-ray-job/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-ray-job/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-ray-job/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-ray-job/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-ray-job/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicRayJob": { + "id": "BasicRayJob", + "path": "aws-glue-ray-job/BasicRayJob", + "children": { + "Code2907ea7be4a583708cfffc21b3df1dfa": { + "id": "Code2907ea7be4a583708cfffc21b3df1dfa", + "path": "aws-glue-ray-job/BasicRayJob/Code2907ea7be4a583708cfffc21b3df1dfa", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-ray-job/BasicRayJob/Code2907ea7be4a583708cfffc21b3df1dfa/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-ray-job/BasicRayJob/Code2907ea7be4a583708cfffc21b3df1dfa/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-ray-job/BasicRayJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueray", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "runtime": "Ray2.4" + }, + "defaultArguments": { + "--enable-metrics": "" + }, + "glueVersion": "4.0", + "numberOfWorkers": 3, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "Z.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.RayJob", + "version": "0.0.0" + } + }, + "RayJob5Workers": { + "id": "RayJob5Workers", + "path": "aws-glue-ray-job/RayJob5Workers", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-ray-job/RayJob5Workers/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueray", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "runtime": "Ray2.4" + }, + "defaultArguments": { + "arg1": "value1", + "arg2": "value2", + "--enable-metrics": "" + }, + "glueVersion": "4.0", + "name": "RayJobWith5Workers", + "numberOfWorkers": 5, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "workerType": "Z.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.RayJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-ray-job/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-ray-job/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-ray-job-integ-test": { + "id": "aws-glue-ray-job-integ-test", + "path": "aws-glue-ray-job-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-ray-job-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-ray-job-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-ray-job-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-ray-job-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-ray-job-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.ts new file mode 100644 index 0000000000000..ecd36233ab62e --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.ts @@ -0,0 +1,56 @@ +import * as path from 'path'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as integ from '@aws-cdk/integ-tests-alpha'; + +/** + * To verify the ability to run jobs created in this test + * + * Run the job using + * `aws glue start-job-run --region us-east-1 --job-name ` + * This will return a runId + * + * Get the status of the job run using + * `aws glue get-job-run --region us-east-1 --job-name --run-id ` + * + * For example, to test the ShellJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ShellJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ShellJob --run-id ` + * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" + */ +const app = new cdk.App(); + +const stack = new cdk.Stack(app, 'aws-glue-ray-job'); + +const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); + +const iam_role = new iam.Role(stack, 'IAMServiceRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], +}); + +new glue.RayJob(stack, 'BasicRayJob', { + script: script, + role: iam_role, +}); + +new glue.RayJob(stack, 'RayJob5Workers', { + script: script, + role: iam_role, + numberOrWorkers: 5, + jobName: 'RayJobWith5Workers', + defaultArguments: { + arg1: 'value1', + arg2: 'value2', + }, + tags: { + key: 'value', + }, +}); + +new integ.IntegTest(app, 'aws-glue-ray-job-integ-test', { + testCases: [stack], +}); + +app.synth(); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts new file mode 100644 index 0000000000000..0581ea71b6329 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts @@ -0,0 +1,55 @@ + +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { Template } from 'aws-cdk-lib/assertions'; + +describe('Job', () => { + let stack: cdk.Stack; + let role: iam.IRole; + let script: glue.Code; + let codeBucket: s3.IBucket; + let job: glue.IJob; + + beforeEach(() => { + stack = new cdk.Stack(); + role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); + codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + script = glue.Code.fromBucket(codeBucket, 'script'); + }); + + describe('Create new Ray Job with default parameters', () => { + + beforeEach(() => { + job = new glue.RayJob(stack, 'ImportedJob', { role, script }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Default number of workers should be 3', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 3, + }); + }); + + test('Default worker type should be Z.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'Z.2X', + }); + }); + }); +}); \ No newline at end of file From a234b6b7b7c33ce5ddbbd1d4016be6ec20a0c191 Mon Sep 17 00:00:00 2001 From: Deepak Kovvuri Date: Tue, 30 Apr 2024 13:48:18 +0000 Subject: [PATCH 21/51] Initial Commit for Workflow Triggers --- .../@aws-cdk/aws-glue-alpha/lib/constants.ts | 41 ++ packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 4 +- .../lib/triggers/conditional-triggers.ts | 7 - .../lib/triggers/notify-event-trigger.ts | 10 - .../lib/triggers/on-demand-trigger.ts | 8 - .../lib/triggers/scheduled-trigger.ts | 12 - .../lib/triggers/trigger-options.ts | 238 ++++++++++ .../aws-glue-alpha/lib/triggers/trigger.ts | 7 - .../aws-glue-alpha/lib/triggers/workflow.ts | 433 ++++++++++++++++++ .../aws-glue-alpha/test/integ.workflow.ts | 60 +++ .../test/workflow-triggers.test.ts | 289 ++++++++++++ 11 files changed, 1064 insertions(+), 45 deletions(-) delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-triggers.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger-options.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/triggers/workflow.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/workflow-triggers.test.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts index 7b1cfd7896fdf..fac94436e128b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts @@ -238,3 +238,44 @@ export enum JobType { } +/** + * Represents the logical operator for combining multiple conditions in the Glue Trigger API. + */ +export enum PredicateLogical { + /** All conditions must be true for the predicate to be true. */ + AND = 'AND', + + /** At least one condition must be true for the predicate to be true. */ + ANY = 'ANY', +} + +/** + * Represents the logical operator for evaluating a single condition in the Glue Trigger API. + */ +export enum ConditionLogicalOperator { + /** The condition is true if the values are equal. */ + EQUALS = 'EQUALS', +} + +/** + * Represents the state of a crawler for a condition in the Glue Trigger API. + */ +export enum CrawlerState { + /** The crawler is currently running. */ + RUNNING = 'RUNNING', + + /** The crawler is in the process of being cancelled. */ + CANCELLING = 'CANCELLING', + + /** The crawler has been cancelled. */ + CANCELLED = 'CANCELLED', + + /** The crawler has completed its operation successfully. */ + SUCCEEDED = 'SUCCEEDED', + + /** The crawler has failed to complete its operation. */ + FAILED = 'FAILED', + + /** The crawler encountered an error during its operation. */ + ERROR = 'ERROR', +} diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index 5c5d13f9b5c76..0d81d99cd1e96 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -23,4 +23,6 @@ export * from './jobs/spark-ui-utils'; // export * from './jobs/spark-etl-job'; //export * from './jobs/streaming-job'; export * from './table-base'; -export * from './table-deprecated'; \ No newline at end of file +export * from './table-deprecated'; +export * from './triggers/workflow'; +export * from './triggers/trigger-options'; \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-triggers.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-triggers.ts deleted file mode 100644 index 487bf3b1ed291..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/conditional-triggers.ts +++ /dev/null @@ -1,7 +0,0 @@ -/** - * Conditional Trigger Class - * - * Conditional triggers have a predicate and actions associated with them. - * When the predicateCondition is true, the trigger actions will be executed. - * - */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts deleted file mode 100644 index aaed7b7b623c8..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/notify-event-trigger.ts +++ /dev/null @@ -1,10 +0,0 @@ -/** - * Notify Event Trigger Class - * - * Workflows are mandatory for this trigger type. There are two types of notify event triggers, - * batching and non-batching trigger. - * For batching triggers, developers must specify BatchSize but for non-batching BatchSize will - * be set to 1. - * For both triggers, BatchWindow will be default to 900 seconds. - * - */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts deleted file mode 100644 index f9aa131a1f7d2..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/on-demand-trigger.ts +++ /dev/null @@ -1,8 +0,0 @@ -/** - * On Demand Trigger Class - * - * On demand triggers can start glue jobs or crawlers. - * The trigger method will take an optional description but abstract the requirement of an - * actions list using the job or crawler objects using conditional types. - * - */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts deleted file mode 100644 index c34e61330a519..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/scheduled-trigger.ts +++ /dev/null @@ -1,12 +0,0 @@ -/** - * Scheduled Trigger Base Class - * - * Schedule triggers are a way for developers to create jobs using cron expressions. - * We’ll provide daily, weekly, and monthly convenience functions, as well as a custom function - * that will allow developers to create their own custom timing using the existing - * event Schedule object without having to build their own cron expressions. - * - * The trigger method will take an optional description and list of Actions - * which can refer to Jobs or crawlers via conditional types. - * - */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger-options.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger-options.ts new file mode 100644 index 0000000000000..2157249295724 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger-options.ts @@ -0,0 +1,238 @@ +/** + * Triggers + * + * In AWS Glue, developers can use workflows to create and visualize complex extract, + * transform, and load (ETL) activities involving multiple crawlers, jobs, and triggers. + * + */ + +import * as cdk from 'aws-cdk-lib/core'; +import { JobState, CrawlerState, ConditionLogicalOperator, PredicateLogical } from '../constants'; +import { IJob } from '../jobs/job'; // Use IJob interface instead of concrete class +import { CfnCrawler } from 'aws-cdk-lib/aws-glue'; +import { ISecurityConfiguration } from '../security-configuration'; +import * as events from 'aws-cdk-lib/aws-events'; + +/** + * Represents a trigger action. + */ +export interface Action { + /** + * The job to be executed. + * + * @default - no job is executed + */ + readonly job?: IJob; + + /** + * The job arguments used when this trigger fires. + * + * @default - no arguments are passed to the job + */ + readonly arguments?: { [key: string]: string }; + + /** + * The job run timeout. This is the maximum time that a job run can consume resources before it is terminated and enters TIMEOUT status. + * + * @default - the default timeout value set in the job definition + */ + readonly timeout?: cdk.Duration; + + /** + * The `SecurityConfiguration` to be used with this action. + * + * @default - no security configuration is used + */ + readonly securityConfiguration?: ISecurityConfiguration; + + /** + * The name of the crawler to be used with this action. + * + * @default - no crawler is used + */ + readonly crawler?: CfnCrawler; +} + +/** + * Represents a trigger schedule. + */ +export class TriggerSchedule { + /** + * Creates a new TriggerSchedule instance with a cron expression. + * + * @param options The cron options for the schedule. + * @returns A new TriggerSchedule instance. + */ + public static cron(options: events.CronOptions): TriggerSchedule { + return new TriggerSchedule(events.Schedule.cron(options).expressionString); + } + + /** + * Creates a new TriggerSchedule instance with a custom expression. + * + * @param expression The custom expression for the schedule. + * @returns A new TriggerSchedule instance. + */ + public static expression(expression: string): TriggerSchedule { + return new TriggerSchedule(expression); + } + + /** + * @param expressionString The expression string for the schedule. + */ + private constructor(public readonly expressionString: string) {} +} + +/** + * Represents a trigger predicate. + */ +export interface Predicate { + /** + * The logical operator to be applied to the conditions. + * + * @default - ConditionLogical.AND if multiple conditions are provided, no logical operator if only one condition + */ + readonly logical?: PredicateLogical; + + /** + * A list of the conditions that determine when the trigger will fire. + * + * @default - no conditions are provided + */ + readonly conditions?: Condition[]; +} + +/** + * Represents a trigger condition. + */ +export interface Condition { + /** + * The logical operator for the condition. + * + * @default ConditionLogicalOperator.EQUALS + */ + readonly logicalOperator?: ConditionLogicalOperator; + + /** + * The job to which this condition applies. + * + * @default - no job is specified + */ + readonly job?: IJob; + + /** + * The condition job state. + * + * @default - no job state is specified + */ + readonly state?: JobState; + + /** + * The name of the crawler to which this condition applies. + * + * @default - no crawler is specified + */ + readonly crawlerName?: string; + + /** + * The condition crawler state. + * + * @default - no crawler state is specified + */ + readonly crawlState?: CrawlerState; +} + +/** + * Represents event trigger batch condition. + */ +export interface EventBatchingCondition { + /** + * Number of events that must be received from Amazon EventBridge before EventBridge event trigger fires. + */ + readonly batchSize: number; + + /** + * Window of time in seconds after which EventBridge event trigger fires. + * + * @default - 900 seconds + */ + readonly batchWindow?: cdk.Duration; +} + +/** + * Properties for configuring a Glue Trigger + */ +export interface TriggerOptions { + /** + * A name for the trigger. + * + * @default - no name is provided + */ + readonly name?: string; + + /** + * A description for the trigger. + * + * @default - no description + */ + readonly description?: string; + + /** + * The actions initiated by this trigger. + */ + readonly actions: Action[]; +} + +/** + * Properties for configuring an on-demand Glue Trigger. + */ +export interface OnDemandTriggerOptions extends TriggerOptions {} + +/** + * Properties for configuring a daily-scheduled Glue Trigger. + */ +export interface DailyScheduleTriggerOptions extends TriggerOptions { + /** + * Whether to start the trigger on creation or not. + * + * @default - false + */ + readonly startOnCreation?: boolean; +} + +/** + * Properties for configuring a weekly-scheduled Glue Trigger. + */ +export interface WeeklyScheduleTriggerOptions extends DailyScheduleTriggerOptions {} + +/** + * Properties for configuring a custom-scheduled Glue Trigger. + */ +export interface CustomScheduledTriggerOptions extends WeeklyScheduleTriggerOptions { + /** + * The custom schedule for the trigger. + */ + readonly schedule: TriggerSchedule; +} + +/** + * Properties for configuring an Event Bridge based Glue Trigger. + */ +export interface NotifyEventTriggerOptions extends TriggerOptions { + /** + * Batch condition for the trigger. + * + * @default - no batch condition + */ + readonly eventBatchingCondition?: EventBatchingCondition; +} + +/** + * Properties for configuring a Condition (Predicate) based Glue Trigger. + */ +export interface ConditionalTriggerOptions extends DailyScheduleTriggerOptions{ + /** + * The predicate for the trigger. + */ + readonly predicate: Predicate; +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts deleted file mode 100644 index c40c3d0efe805..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/trigger.ts +++ /dev/null @@ -1,7 +0,0 @@ -/** - * Workflow Trigger Base Class - * - * In AWS Glue, developers can use workflows to create and visualize complex extract, - * transform, and load (ETL) activities involving multiple crawlers, jobs, and triggers. - * - */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/triggers/workflow.ts b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/workflow.ts new file mode 100644 index 0000000000000..537fd9b51d1bc --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/triggers/workflow.ts @@ -0,0 +1,433 @@ +/** + * This module defines a construct for creating and managing AWS Glue Workflows and Triggers. + * + * AWS Glue Workflows are orchestration services that allow you to create, manage, and monitor complex extract, transform, and load (ETL) activities involving multiple crawlers, jobs, and triggers. Workflows are designed to allow you to manage interdependent jobs and crawlers as a single unit, making it easier to orchestrate and monitor complex ETL pipelines. + * + * Triggers are used to initiate an AWS Glue Workflow. You can configure different types of triggers, such as on-demand, scheduled, event-based, or conditional triggers, to start your Workflow based on specific conditions or events. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/workflows_overview.html + * @see https://docs.aws.amazon.com/glue/latest/dg/about-triggers.html + * + * ## Usage Example + * + * ```typescript + * import * as cdk from 'aws-cdk-lib'; + * import * as glue from 'aws-glue-cdk-lib'; + * + * const app = new cdk.App(); + * const stack = new cdk.Stack(app, 'TestStack'); + * + * // Create a Glue Job + * const job = new glue.Job(stack, 'TestJob', { + * // Job properties + * }); + * + * // Create a Glue Workflow + * const workflow = new glue.Workflow(stack, 'TestWorkflow', { + * // Workflow properties + * }); + * + * // Add an on-demand trigger to the Workflow + * workflow.addOnDemandTrigger('OnDemandTrigger', { + * actions: [{ job: job }], + * }); + * ``` + */ + +import * as cdk from 'aws-cdk-lib/core'; +import * as constructs from 'constructs'; +import { CfnWorkflow, CfnTrigger } from 'aws-cdk-lib/aws-glue'; +import { + ConditionLogicalOperator, + PredicateLogical, +} from '../constants'; +import { + Action, + TriggerSchedule, + OnDemandTriggerOptions, + WeeklyScheduleTriggerOptions, + DailyScheduleTriggerOptions, + CustomScheduledTriggerOptions, + NotifyEventTriggerOptions, + ConditionalTriggerOptions, +} from './trigger-options'; + +/** + * The base interface for Glue Workflow + * + * @see {@link Workflow} + * @see https://docs.aws.amazon.com/glue/latest/dg/workflows_overview.html + */ +export interface IWorkflow extends cdk.IResource { + /** + * The name of the workflow + * @attribute + */ + readonly workflowName: string; + + /** + * The ARN of the workflow + * @attribute + */ + readonly workflowArn: string; + + /** + * Add an on-demand trigger to the workflow + */ + addOnDemandTrigger(id: string, options: OnDemandTriggerOptions): CfnTrigger; + + /** + * Add an daily-scheduled trigger to the workflow + */ + addDailyScheduledTrigger(id: string, options: DailyScheduleTriggerOptions): CfnTrigger; + + /** + * Add an weekly-scheduled trigger to the workflow + */ + addWeeklyScheduledTrigger(id: string, options: WeeklyScheduleTriggerOptions): CfnTrigger; + + /** + * Add an custom-scheduled trigger to the workflow + */ + addCustomScheduledTrigger(id: string, options: CustomScheduledTriggerOptions): CfnTrigger; +} + +/** + * Properties for importing a Workflow using its attributes + */ +export interface WorkflowAttributes { + /** + * The name of the workflow to import + */ + readonly workflowName: string; + /** + * The ARN of the workflow to import + * + * @default - derived from the workflow name + */ + readonly workflowArn?: string; +} + +/** + * Properties for defining a Workflow + */ +export interface WorkflowProps { + /** + * Name of the workflow + * + * @default - a name will be generated + */ + readonly workflowName?: string; + + /** + * A description of the workflow + * + * @default - no description + */ + readonly description?: string; + + /** + * A map of properties to use when this workflow is executed + * + * @default - no default run properties + */ + readonly defaultRunProperties?: { [key: string]: string }; + + /** + * The maximum number of concurrent runs allowed for the workflow + * + * @default - no limit + */ + readonly maxConcurrentRuns?: number; +} + +/** + * Base abstract class for Workflow + * + * @see https://docs.aws.amazon.com/glue/latest/dg/about-triggers.html + */ +export abstract class WorkflowBase extends cdk.Resource implements IWorkflow { + /** + * Extract workflowName from arn + */ + protected static extractNameFromArn(scope: constructs.Construct, workflowArn: string): string { + return cdk.Stack.of(scope).splitArn( + workflowArn, + cdk.ArnFormat.SLASH_RESOURCE_NAME).resourceName!; + } + + public abstract readonly workflowName: string; + public abstract readonly workflowArn: string; + + /** + * Add an on-demand trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided. + * @returns The created CfnTrigger resource. + */ + public addOnDemandTrigger(id: string, options: OnDemandTriggerOptions): CfnTrigger { + const trigger = new CfnTrigger(this, id, { + ...options, + workflowName: this.workflowName, + type: 'ON_DEMAND', + actions: options.actions?.map(this.renderAction), + description: options.description || undefined, + }); + + return trigger; + } + + /** + * Add a daily-scheduled trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided. + * @returns The created CfnTrigger resource. + */ + public addDailyScheduledTrigger(id: string, options: DailyScheduleTriggerOptions): CfnTrigger { + const dailySchedule = TriggerSchedule.cron({ + minute: '0', + hour: '0', + }); + + const trigger = new CfnTrigger(this, id, { + ...options, + workflowName: this.workflowName, + type: 'SCHEDULED', + actions: options.actions?.map(this.renderAction), + schedule: dailySchedule.expressionString, + startOnCreation: options.startOnCreation ?? false, + }); + + return trigger; + } + + /** + * Add a weekly-scheduled trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided. + * @returns The created CfnTrigger resource. + */ + public addWeeklyScheduledTrigger(id: string, options: WeeklyScheduleTriggerOptions): CfnTrigger { + const weeklySchedule = TriggerSchedule.cron({ + minute: '0', + hour: '0', + weekDay: 'SUN', + }); + + const trigger = new CfnTrigger(this, id, { + ...options, + workflowName: this.workflowName, + type: 'SCHEDULED', + actions: options.actions?.map(this.renderAction), + schedule: weeklySchedule.expressionString, + startOnCreation: options.startOnCreation ?? false, + }); + + return trigger; + } + + /** + * Add a custom-scheduled trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided. + * @returns The created CfnTrigger resource. + */ + public addCustomScheduledTrigger(id: string, options: CustomScheduledTriggerOptions): CfnTrigger { + const trigger = new CfnTrigger(this, id, { + ...options, + workflowName: this.workflowName, + type: 'SCHEDULED', + actions: options.actions?.map(this.renderAction), + schedule: options.schedule.expressionString, + startOnCreation: options.startOnCreation ?? false, + }); + + return trigger; + } + + /** + * Add an Event Bridge based trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided. + * @returns The created CfnTrigger resource. + */ + public addNotifyEventTrigger(id: string, options: NotifyEventTriggerOptions): CfnTrigger { + const trigger = new CfnTrigger(this, id, { + ...options, + workflowName: this.workflowName, + type: 'EVENT', + actions: options.actions?.map(this.renderAction), + eventBatchingCondition: this.renderEventBatchingCondition(options), + description: options.description ?? undefined, + }); + + return trigger; + } + + /** + * Add a Condition (Predicate) based trigger to the workflow. + * + * @param id The id of the trigger. + * @param options Additional options for the trigger. + * @throws If both job and crawler are provided, or if neither job nor crawler is provided for any condition. + * @throws If a job is provided without a job state, or if a crawler is provided without a crawler state for any condition. + * @returns The created CfnTrigger resource. + */ + public addconditionalTrigger(id: string, options: ConditionalTriggerOptions): CfnTrigger { + const trigger = new CfnTrigger(this, id, { + ...options, + workflowName: this.workflowName, + type: 'CONDITIONAL', + actions: options.actions?.map(this.renderAction), + predicate: this.renderPredicate(options), + eventBatchingCondition: this.renderEventBatchingCondition(options), + description: options.description ?? undefined, + }); + + return trigger; + } + + private renderAction(action: Action): CfnTrigger.ActionProperty { + // Validate that either job or crawler is provided, but not both + if (!action.job && !action.crawler) { + throw new Error('You must provide either a job or a crawler for the action.'); + } else if (action.job && action.crawler) { + throw new Error('You cannot provide both a job and a crawler for the action.'); + } + + return { + jobName: action.job?.jobName, + arguments: action.arguments, + timeout: action.timeout?.toMinutes(), + securityConfiguration: action.securityConfiguration?.securityConfigurationName, + crawlerName: action.crawler?.name, + }; + } + + private renderPredicate(props: ConditionalTriggerOptions): CfnTrigger.PredicateProperty { + const conditions = props.predicate.conditions?.map(condition => { + // Validate that either job or crawler is provided, but not both + if (!condition.job && !condition.crawlerName) { + throw new Error('You must provide either a job or a crawler for the condition.'); + } else if (condition.job && condition.crawlerName) { + throw new Error('You cannot provide both a job and a crawler for the condition.'); + } + + // Validate that if job is provided, job state is also provided + if (condition.job && !condition.state) { + throw new Error('If you provide a job for the condition, you must also provide a job state.'); + } + + // Validate that if crawler is provided, crawler state is also provided + if (condition.crawlerName && !condition.crawlState) { + throw new Error('If you provide a crawler for the condition, you must also provide a crawler state.'); + } + + return { + logicalOperator: condition.logicalOperator ?? ConditionLogicalOperator.EQUALS, + jobName: condition.job?.jobName ?? undefined, + state: condition.state ?? undefined, + crawlerName: condition.crawlerName ?? undefined, + crawlState: condition.crawlState ?? undefined, + }; + }); + + return { + logical: props.predicate.conditions?.length === 1 ? undefined : props.predicate.logical ?? PredicateLogical.AND, + conditions: conditions, + }; + } + + private renderEventBatchingCondition(props: NotifyEventTriggerOptions): CfnTrigger.EventBatchingConditionProperty { + + const defaultBatchSize = 1; + const defaultBatchWindow = cdk.Duration.seconds(900).toSeconds(); + + if (!props.eventBatchingCondition) { + return { + batchSize: defaultBatchSize, + batchWindow: defaultBatchWindow, + }; + } + + return { + batchSize: props.eventBatchingCondition.batchSize || defaultBatchSize, + batchWindow: props.eventBatchingCondition.batchWindow?.toSeconds() || defaultBatchWindow, + }; + } + + protected buildWorkflowArn(scope: constructs.Construct, workflowName: string): string { + return cdk.Stack.of(scope).formatArn({ + service: 'glue', + resource: 'workflow', + resourceName: workflowName, + }); + } +} + +/** + * A class used for defining a Glue Workflow + * + * @resource AWS::Glue::Workflow + */ +export class Workflow extends WorkflowBase { + /** + * Import a workflow from its name + */ + public static fromWorkflowName(scope: constructs.Construct, id: string, workflowName: string): IWorkflow { + return this.fromWorkflowAttributes(scope, id, { + workflowName, + }); + } + + /** + * Import an workflow from it's name + */ + public static fromWorkflowArn(scope: constructs.Construct, id: string, workflowArn: string): IWorkflow { + return this.fromWorkflowAttributes(scope, id, { + workflowName: this.extractNameFromArn(scope, workflowArn), + workflowArn, + }); + } + + /** + * Import an existing workflow + */ + public static fromWorkflowAttributes(scope: constructs.Construct, id: string, attrs: WorkflowAttributes): IWorkflow { + class Import extends WorkflowBase { + public readonly workflowName = attrs.workflowName; + public readonly workflowArn = this.buildWorkflowArn(scope, this.workflowName); + } + + return new Import(scope, id); + } + + public readonly workflowName: string; + public readonly workflowArn: string; + + constructor(scope: constructs.Construct, id: string, props?: WorkflowProps) { + super(scope, id, { + physicalName: props?.workflowName, + }); + + const resource = new CfnWorkflow(this, 'Resource', { + name: this.physicalName, + description: props?.description, + defaultRunProperties: props?.defaultRunProperties, + maxConcurrentRuns: props?.maxConcurrentRuns, + }); + + this.workflowName = this.getResourceNameAttribute(resource.ref); + this.workflowArn = this.buildWorkflowArn(this, this.workflowName); + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.ts new file mode 100644 index 0000000000000..5fb91a398d4ad --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.ts @@ -0,0 +1,60 @@ +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as path from 'path'; + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'GlueWorkflowTriggerStack'); + +const workflow = new glue.Workflow(stack, 'Workflow', { + description: 'MyWorkflow', +}); + +const role = new iam.Role(stack, 'JobRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), +}); + +const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); + +const OutboundJob = new glue.PySparkEtlJob(stack, 'OutboundJob', { + script: script, + role, + glueVersion: glue.GlueVersion.V4_0, + workerType: glue.WorkerType.G_2X, + numberOrWorkers: 2, +}); + +const InboundJob = new glue.PySparkEtlJob(stack, 'InboundJob', { + script: script, + role, + glueVersion: glue.GlueVersion.V4_0, + workerType: glue.WorkerType.G_2X, + numberOrWorkers: 2, +}); + +workflow.addOnDemandTrigger('OnDemandTrigger', { + actions: [{ job: InboundJob }], +}); + +workflow.addconditionalTrigger('ConditionalTrigger', { + actions: [{ job: OutboundJob }], + predicate: { + conditions: [ + { + job: InboundJob, + state: glue.JobState.SUCCEEDED, + }, + ], + }, +}); + +new cdk.CfnOutput(stack, 'WorkflowName', { + value: workflow.workflowName, +}); + +new integ.IntegTest(app, 'aws-cdk-glue-workflow-trigger-integ', { + testCases: [stack], +}); + +app.synth(); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/workflow-triggers.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/workflow-triggers.test.ts new file mode 100644 index 0000000000000..74d80b7b455c4 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/workflow-triggers.test.ts @@ -0,0 +1,289 @@ +import * as cdk from 'aws-cdk-lib'; +import { Template, Capture } from 'aws-cdk-lib/assertions'; +import * as glue from '../lib'; +import { TriggerSchedule } from '../lib/triggers/trigger-options'; +import * as iam from 'aws-cdk-lib/aws-iam'; + +describe('Workflow and Triggers', () => { + let stack: cdk.Stack; + let workflow: glue.Workflow; + let job: glue.PySparkEtlJob; + let role: iam.Role; + + beforeEach(() => { + stack = new cdk.Stack(); + workflow = new glue.Workflow(stack, 'Workflow', { + description: 'MyWorkflow', + }); + + role = new iam.Role(stack, 'JobRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + }); + + job = new glue.PySparkEtlJob(stack, 'Job', { + script: glue.Code.fromAsset('test/job-script/hello_world.py'), + role, + glueVersion: glue.GlueVersion.V4_0, + workerType: glue.WorkerType.G_2X, + numberOrWorkers: 10, + }); + }); + + test('creates a workflow with triggers and actions', () => { + workflow.addOnDemandTrigger('OnDemandTrigger', { + actions: [{ job }], + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Workflow', { + Description: 'MyWorkflow', + }); + + Template.fromStack(stack).resourceCountIs('AWS::Glue::Trigger', 1); + + const workflowReference = new Capture(); + const actionReference = new Capture(); + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Trigger', { + Type: 'ON_DEMAND', + WorkflowName: workflowReference, + Actions: [actionReference], + }); + + expect(workflowReference.asObject()).toEqual( + { + Ref: 'Workflow193EF7C1', + }, + ); + + expect(actionReference.asObject()).toEqual( + { + JobName: { + Ref: 'JobB9D00F9F', + }, + }, + ); + + }); + + test('creates a workflow with conditional trigger', () => { + workflow.addconditionalTrigger('ConditionalTrigger', { + actions: [{ job }], + predicate: { + conditions: [ + { + job, + state: glue.JobState.SUCCEEDED, + }, + ], + }, + }); + + Template.fromStack(stack).resourceCountIs('AWS::Glue::Trigger', 1); + + const workflowReference = new Capture(); + const actionReference = new Capture(); + const predicateReference = new Capture(); + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Trigger', { + Type: 'CONDITIONAL', + WorkflowName: workflowReference, + Actions: [actionReference], + Predicate: predicateReference, + }); + + expect(workflowReference.asObject()).toEqual( + expect.objectContaining({ + Ref: 'Workflow193EF7C1', + }), + ); + + expect(actionReference.asObject()).toEqual( + expect.objectContaining({ + JobName: { + Ref: 'JobB9D00F9F', + }, + }), + ); + + expect(predicateReference.asObject()).toEqual( + expect.objectContaining({ + Conditions: [ + { + JobName: { + Ref: 'JobB9D00F9F', + }, + LogicalOperator: 'EQUALS', + State: 'SUCCEEDED', + }, + ], + }), + ); + }); + + test('creates a workflow with daily scheduled trigger', () => { + workflow.addDailyScheduledTrigger('DailyScheduledTrigger', { + actions: [{ job }], + startOnCreation: true, + }); + + Template.fromStack(stack).resourceCountIs('AWS::Glue::Trigger', 1); + + const workflowReference = new Capture(); + const actionReference = new Capture(); + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Trigger', { + Type: 'SCHEDULED', + WorkflowName: workflowReference, + Schedule: 'cron(0 0 * * ? *)', + StartOnCreation: true, + Actions: [actionReference], + }); + + expect(workflowReference.asObject()).toEqual( + expect.objectContaining({ + Ref: 'Workflow193EF7C1', + }), + ); + + expect(actionReference.asObject()).toEqual( + expect.objectContaining({ + JobName: { + Ref: 'JobB9D00F9F', + }, + }), + ); + }); + + test('creates a workflow with weekly scheduled trigger', () => { + workflow.addWeeklyScheduledTrigger('WeeklyScheduledTrigger', { + actions: [{ job }], + startOnCreation: false, + }); + + Template.fromStack(stack).resourceCountIs('AWS::Glue::Trigger', 1); + + const workflowReference = new Capture(); + const actionReference = new Capture(); + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Trigger', { + Type: 'SCHEDULED', + WorkflowName: workflowReference, + Schedule: 'cron(0 0 ? * SUN *)', + StartOnCreation: false, + Actions: [actionReference], + }); + + expect(workflowReference.asObject()).toEqual( + expect.objectContaining({ + Ref: 'Workflow193EF7C1', + }), + ); + + expect(actionReference.asObject()).toEqual( + expect.objectContaining({ + JobName: { + Ref: 'JobB9D00F9F', + }, + }), + ); + }); + + test('creates a workflow with custom scheduled trigger', () => { + const customSchedule = TriggerSchedule.cron({ + minute: '0', + hour: '20', + weekDay: 'THU', + }); + + workflow.addCustomScheduledTrigger('CustomScheduledTrigger', { + actions: [{ job }], + schedule: customSchedule, + startOnCreation: true, + }); + + Template.fromStack(stack).resourceCountIs('AWS::Glue::Trigger', 1); + + const workflowReference = new Capture(); + const actionReference = new Capture(); + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Trigger', { + Type: 'SCHEDULED', + WorkflowName: workflowReference, + Schedule: 'cron(0 20 ? * THU *)', + StartOnCreation: true, + Actions: [actionReference], + }); + + expect(workflowReference.asObject()).toEqual( + expect.objectContaining({ + Ref: 'Workflow193EF7C1', + }), + ); + + expect(actionReference.asObject()).toEqual( + expect.objectContaining({ + JobName: { + Ref: 'JobB9D00F9F', + }, + }), + ); + }); + + test('creates a workflow with notify event trigger', () => { + workflow.addNotifyEventTrigger('NotifyEventTrigger', { + actions: [{ job }], + eventBatchingCondition: { + batchSize: 10, + batchWindow: cdk.Duration.minutes(5), + }, + }); + + Template.fromStack(stack).resourceCountIs('AWS::Glue::Trigger', 1); + + const workflowReference = new Capture(); + const actionReference = new Capture(); + const eventBatchingConditionReference = new Capture(); + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Trigger', { + Type: 'EVENT', + WorkflowName: workflowReference, + Actions: [actionReference], + EventBatchingCondition: eventBatchingConditionReference, + }); + + expect(workflowReference.asObject()).toEqual( + expect.objectContaining({ + Ref: 'Workflow193EF7C1', + }), + ); + + expect(actionReference.asObject()).toEqual( + expect.objectContaining({ + JobName: { + Ref: 'JobB9D00F9F', + }, + }), + ); + + expect(eventBatchingConditionReference.asObject()).toEqual( + expect.objectContaining({ + BatchSize: 10, + BatchWindow: 300, + }), + ); + }); +}); + +describe('.fromWorkflowAttributes()', () => { + let stack: cdk.Stack; + + beforeEach(() => { + stack = new cdk.Stack(); + }); + + test('with required attrs only', () => { + const workflowName = 'my-existing-workflow'; + const importedWorkflow = glue.Workflow.fromWorkflowAttributes(stack, 'ImportedWorkflow', { workflowName }); + + expect(importedWorkflow.workflowName).toEqual(workflowName); + expect(importedWorkflow.workflowArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'workflow', + resourceName: workflowName, + })); + }); +}); \ No newline at end of file From 3ff57ce15ed2d28b2677bdce98b853b81c9772cd Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 28 May 2024 13:39:10 +0000 Subject: [PATCH 22/51] ETL Jobs and tests --- .../lib/jobs/scala-spark-etl-job.ts | 176 ++++++++++++++++++ .../test/integ.job-pyspark-etl.ts | 60 ++++++ .../test/integ.job-scalaspark-etl.ts | 63 +++++++ .../test/job-jar/helloworld.jar | Bin 0 -> 782 bytes .../test/pyspark-etl-jobs.test.ts | 54 ++++++ .../test/scalaspark-etl-jobs.test.ts | 56 ++++++ 6 files changed, 409 insertions(+) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts index 9be9ab1859c85..0c9ccdf029dbc 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -8,5 +8,181 @@ * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. * You can find more details about version, worker type and other features * in Glue's public documentation. + * + * RFC: https://github.com/aws/aws-cdk-rfcs/blob/main/text/0497-glue-l2-construct.md + * */ +import * as iam from 'aws-cdk-lib/aws-iam'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { CfnJob } from 'aws-cdk-lib/aws-glue'; +import { Job, JobProperties } from './job'; +import { Construct } from 'constructs'; +import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '../constants'; +import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; + + +/** + * Properties for creating a Scala Spark ETL job + */ +export interface ScalaSparkEtlJobProps extends JobProperties { + + /** + * Enables the Spark UI debugging and monitoring with the specified props. + * + * @default - Spark UI debugging and monitoring is disabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly sparkUI?: SparkUIProps; + + /** + * Class name (required for Scala scripts) + * Package and class name for the entry point of Glue job execution for + * Java scripts + **/ + readonly className: string; + + /** + * Extra Jars S3 URL (optional) + * S3 URL where additional jar dependencies are located + * @default - no extra jar files + */ + readonly extraJars?: string[]; +} + +/** + * A Scala Spark ETL Glue Job + */ +export class ScalaSparkEtlJob extends Job { + + // Implement abstract Job attributes + public readonly jobArn: string; + public readonly jobName: string; + public readonly role: iam.IRole; + public readonly grantPrincipal: iam.IPrincipal; + + /** + * The Spark UI logs location if Spark UI monitoring and debugging is enabled. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html + * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + public readonly sparkUILoggingLocation?: SparkUILoggingLocation; + + /** + * ScalaSparkEtlJob constructor + * + * @param scope + * @param id + * @param props + */ + constructor(scope: Construct, id: string, props: ScalaSparkEtlJobProps) { + super(scope, id, { + physicalName: props.jobName, + }); + + // Set up role and permissions for principal + this.role = props.role, { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], + }; + this.grantPrincipal = this.role; + + // Enable SparkUI by default as a best practice + const sparkUIArgs = props.sparkUI?.bucket ? this.setupSparkUI(this.role, props.sparkUI) : undefined; + this.sparkUILoggingLocation = sparkUIArgs?.location; + + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const profilingMetricsArgs = { '--enable-metrics': '' }; + + // Gather executable arguments + const execuatbleArgs = this.executableArguments(props); + + // Mandatory className argument + if (props.className === undefined) { + throw new Error('className must be set for Scala ETL Jobs'); + } + + // Conbine command line arguments into a single line item + const defaultArguments = { + ...execuatbleArgs, + ...continuousLoggingArgs, + ...profilingMetricsArgs, + ...sparkUIArgs?.args, + ...this.checkNoReservedArgs(props.defaultArguments), + }; + + if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { + throw new Error('Both workerType and numberOrWorkers must be set'); + } + + const jobResource = new CfnJob(this, 'Resource', { + name: props.jobName, + description: props.description, + role: this.role.roleArn, + command: { + name: JobType.ETL, + scriptLocation: this.codeS3ObjectUrl(props.script), + }, + glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, + numberOfWorkers: props.numberOrWorkers, + maxRetries: props.maxRetries, + executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, + timeout: props.timeout?.toMinutes(), + connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, + securityConfiguration: props.securityConfiguration?.securityConfigurationName, + tags: props.tags, + defaultArguments, + }); + + const resourceName = this.getResourceNameAttribute(jobResource.ref); + this.jobArn = this.buildJobArn(this, resourceName); + this.jobName = resourceName; + } + + /** + * Set the executable arguments with best practices enabled by default + * + * @param props + * @returns An array of arguments for Glue to use on execution + */ + private executableArguments(props: ScalaSparkEtlJobProps) { + const args: { [key: string]: string } = {}; + args['--job-language'] = JobLanguage.SCALA; + + // if (props.extraJars && props.extraJars?.length > 0) { + // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraFiles && props.extraFiles.length > 0) { + // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + // } + // if (props.extraJarsFirst) { + // args['--user-jars-first'] = 'true'; + // } + + return args; + } + + private setupSparkUI(role: iam.IRole, sparkUiProps: SparkUIProps) { + + validateSparkUiPrefix(sparkUiProps.prefix); + const bucket = sparkUiProps.bucket ?? new Bucket(this, 'SparkUIBucket'); + bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); + const args = { + '--enable-spark-ui': 'true', + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + }; + + return { + location: { + prefix: sparkUiProps.prefix, + bucket, + }, + args, + }; + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.ts new file mode 100644 index 0000000000000..86f874b908b1b --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.ts @@ -0,0 +1,60 @@ +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as path from 'path'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; + +/** + * To verify the ability to run jobs created in this test + * + * Run the job using + * `aws glue start-job-run --region us-east-1 --job-name ` + * This will return a runId + * + * Get the status of the job run using + * `aws glue get-job-run --region us-east-1 --job-name --run-id ` + * + * For example, to test the ETLJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ETLJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ETLJob --run-id ` + * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" + */ + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'aws-glue-job-pyspark-etl'); + +const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); + +const iam_role = new iam.Role(stack, 'IAMServiceRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], +}); + +new glue.PySparkEtlJob(stack, 'BasicPySparkETLJob', { + script: script, + role: iam_role, +}); + +new glue.PySparkEtlJob(stack, 'OverridePySparkETLJob', { + script: script, + role: iam_role, + description: 'Optional Override PySpark ETL Job', + glueVersion: glue.GlueVersion.V3_0, + numberOrWorkers: 20, + workerType: glue.WorkerType.G_2X, + timeout: cdk.Duration.minutes(15), + jobName: 'Optional Override PySpark ETL Job', + defaultArguments: { + arg1: 'value1', + arg2: 'value2', + }, + tags: { + key: 'value', + }, +}); + +new integ.IntegTest(app, 'aws-glue-job-pyspark-etl-integ-test', { + testCases: [stack], +}); + +app.synth(); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.ts new file mode 100644 index 0000000000000..1432e78da249d --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.ts @@ -0,0 +1,63 @@ +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as path from 'path'; +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; + +/** + * To verify the ability to run jobs created in this test + * + * Run the job using + * `aws glue start-job-run --region us-east-1 --job-name ` + * This will return a runId + * + * Get the status of the job run using + * `aws glue get-job-run --region us-east-1 --job-name --run-id ` + * + * For example, to test the ETLJob + * - Run: `aws glue start-job-run --region us-east-1 --job-name ETLJob` + * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ETLJob --run-id ` + * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" + */ + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'aws-glue-job-scalaspark-etl'); + +const jar_file = glue.Code.fromAsset(path.join(__dirname, 'job-jar', 'helloworld.jar')); +const job_class ='com.example.HelloWorld'; + +const iam_role = new iam.Role(stack, 'IAMServiceRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], +}); + +new glue.ScalaSparkEtlJob(stack, 'BasicScalaSparkETLJob', { + script: jar_file, + role: iam_role, + className: job_class, +}); + +new glue.ScalaSparkEtlJob(stack, 'OverrideScalaSparkETLJob', { + script: jar_file, + className: job_class, + role: iam_role, + description: 'Optional Override ScalaSpark ETL Job', + glueVersion: glue.GlueVersion.V3_0, + numberOrWorkers: 20, + workerType: glue.WorkerType.G_2X, + timeout: cdk.Duration.minutes(15), + jobName: 'Optional Override ScalaSpark ETL Job', + defaultArguments: { + arg1: 'value1', + arg2: 'value2', + }, + tags: { + key: 'value', + }, +}); + +new integ.IntegTest(app, 'aws-glue-job-scalaspark-etl-integ-test', { + testCases: [stack], +}); + +app.synth(); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar b/packages/@aws-cdk/aws-glue-alpha/test/job-jar/helloworld.jar new file mode 100644 index 0000000000000000000000000000000000000000..41a6aa95d5aff514ba19b9a9b4c8bfff3ec123a0 GIT binary patch literal 782 zcmWIWW@Zs#;Nak3*vZlp!GHuf8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1gMK3ugvA8%lz}Ne*gFx-=CBBp0qNAhjxvK;gu@`>~ z`>52>zR1ayf1cUPplvyuRMi&Qd-HS2{bBs4QMj$k@x`HcXZBSZe@?U8f6tylX5ssV zZOgjF!d_)mzeu?1c-^soVVO?w%=rcp2mkRl*RDF=w9s zc%joEIcMfi%~`9Zb)>khS6q5_X61#YiU%G{x~_iuL>T|Q1M^NUWW5kQyICwy>O=j? z>M2 { + let stack: cdk.Stack; + let role: iam.IRole; + let script: glue.Code; + let codeBucket: s3.IBucket; + let job: glue.IJob; + + beforeEach(() => { + stack = new cdk.Stack(); + role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); + codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + script = glue.Code.fromBucket(codeBucket, 'script'); + }); + + describe('Create new PySpark ETL Job with default parameters', () => { + + beforeEach(() => { + job = new glue.PySparkEtlJob(stack, 'ImportedJob', { role, script }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + + test('Default WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.2X', + }); + }); + }); +}); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts new file mode 100644 index 0000000000000..1e5957dce85e4 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts @@ -0,0 +1,56 @@ +import * as cdk from 'aws-cdk-lib'; +import * as glue from '../lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { Template } from 'aws-cdk-lib/assertions'; + +describe('Job', () => { + let stack: cdk.Stack; + let role: iam.IRole; + let script: glue.Code; + let codeBucket: s3.IBucket; + let job: glue.IJob; + let className: string; + + beforeEach(() => { + stack = new cdk.Stack(); + role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); + codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + script = glue.Code.fromBucket(codeBucket, 'script'); + className = 'com.example.HelloWorld'; + }); + + describe('Create new Scala Spark ETL Job with default parameters', () => { + + beforeEach(() => { + job = new glue.ScalaSparkEtlJob(stack, 'ImportedJob', { role, script, className }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + + test('Default WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.2X', + }); + }); + }); +}); \ No newline at end of file From f9675bc038906086dba3170a9a7479663e79db85 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 28 May 2024 13:51:58 +0000 Subject: [PATCH 23/51] ETL Jobs and tests --- packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 1 + .../@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index 5c5d13f9b5c76..f0d5fb5954c66 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -16,6 +16,7 @@ export * from './constants'; export * from './jobs/job'; // export * from './jobs/flex-job'; export * from './jobs/pyspark-etl-job'; +export * from './jobs/scala-spark-etl-job'; // export * from './jobs/python-shell-job'; // export * from './jobs/ray-job'; // export * from './jobs/scala-spark-etl-job'; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts index 0c9ccdf029dbc..1e6a2a2f5aebf 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -18,7 +18,7 @@ import { Bucket } from 'aws-cdk-lib/aws-s3'; import { CfnJob } from 'aws-cdk-lib/aws-glue'; import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; -import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '../constants'; +import { JobType, GlueVersion, JobLanguage, WorkerType } from '../constants'; import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; From 06c7d0a9c93b490ed4593c2e6454d7e2b6b4c962 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 28 May 2024 14:04:33 +0000 Subject: [PATCH 24/51] ETL Jobs and tests --- .../lib/jobs/scala-spark-etl-job.ts | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts index 1e6a2a2f5aebf..c8ffea3e128e1 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -8,7 +8,7 @@ * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. * You can find more details about version, worker type and other features * in Glue's public documentation. - * + * * RFC: https://github.com/aws/aws-cdk-rfcs/blob/main/text/0497-glue-l2-construct.md * */ @@ -21,7 +21,6 @@ import { Construct } from 'constructs'; import { JobType, GlueVersion, JobLanguage, WorkerType } from '../constants'; import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; - /** * Properties for creating a Scala Spark ETL job */ @@ -103,7 +102,7 @@ export class ScalaSparkEtlJob extends Job { // Mandatory className argument if (props.className === undefined) { - throw new Error('className must be set for Scala ETL Jobs'); + throw new Error('className must be set for Scala ETL Jobs'); } // Conbine command line arguments into a single line item @@ -153,16 +152,11 @@ export class ScalaSparkEtlJob extends Job { private executableArguments(props: ScalaSparkEtlJobProps) { const args: { [key: string]: string } = {}; args['--job-language'] = JobLanguage.SCALA; + args['--class'] = props.className!; - // if (props.extraJars && props.extraJars?.length > 0) { - // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraFiles && props.extraFiles.length > 0) { - // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraJarsFirst) { - // args['--user-jars-first'] = 'true'; - // } + if (props.extraJars && props.extraJars?.length > 0) { + args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); + } return args; } From 9457e35cd804ecde5a26b2af97c53f81c2244fa2 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 28 May 2024 14:12:55 +0000 Subject: [PATCH 25/51] ETL Jobs and tests --- .../@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts index c8ffea3e128e1..6e1d18e0da71b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -20,6 +20,7 @@ import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; import { JobType, GlueVersion, JobLanguage, WorkerType } from '../constants'; import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; +import { Code } from '../code'; /** * Properties for creating a Scala Spark ETL job @@ -48,7 +49,7 @@ export interface ScalaSparkEtlJobProps extends JobProperties { * S3 URL where additional jar dependencies are located * @default - no extra jar files */ - readonly extraJars?: string[]; + readonly extraJars?: Code[]; } /** From 0c412cb9d30f313f861a72e94b84ff21b5c10468 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 28 May 2024 14:24:49 +0000 Subject: [PATCH 26/51] Update default python version --- packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts index 9d8a8b70d3dd4..0c6ae42d412b8 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -111,7 +111,7 @@ export class PySparkEtlJob extends Job { command: { name: JobType.ETL, scriptLocation: this.codeS3ObjectUrl(props.script), - pythonVersion: PythonVersion.THREE_NINE, + pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, workerType: props.workerType ? props.workerType : WorkerType.G_2X, From 5413e857e00032c577ed2239616b66646c804965 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 28 May 2024 15:10:53 +0000 Subject: [PATCH 27/51] ETL jobs & tests --- .../lib/jobs/pyspark-etl-job.ts | 2 +- .../lib/jobs/scala-spark-etl-job.ts | 2 +- ...9be7858a12b228a2ae6e5c10faccd9097b1e855.py | 1 + .../aws-glue-job-pyspark-etl.assets.json | 32 ++ .../aws-glue-job-pyspark-etl.template.json | 202 ++++++++++ ...efaultTestDeployAssertED1ACE14.assets.json | 19 + ...aultTestDeployAssertED1ACE14.template.json | 36 ++ .../integ.job-pyspark-etl.js.snapshot/cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 131 +++++++ .../tree.json | 371 ++++++++++++++++++ ...3f8703573eb6b69528c5d52190d72579c91602.jar | Bin 0 -> 782 bytes .../aws-glue-job-scalaspark-etl.assets.json | 32 ++ .../aws-glue-job-scalaspark-etl.template.json | 202 ++++++++++ ...efaultTestDeployAssertCA9A8121.assets.json | 19 + ...aultTestDeployAssertCA9A8121.template.json | 36 ++ .../cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 131 +++++++ .../tree.json | 371 ++++++++++++++++++ 20 files changed, 1611 insertions(+), 2 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts index 0c6ae42d412b8..d6130549883c7 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -115,7 +115,7 @@ export class PySparkEtlJob extends Job { }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, workerType: props.workerType ? props.workerType : WorkerType.G_2X, - numberOfWorkers: props.numberOrWorkers, + numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts index 6e1d18e0da71b..8448da9801b43 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -129,7 +129,7 @@ export class ScalaSparkEtlJob extends Job { }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, workerType: props.workerType ? props.workerType : WorkerType.G_2X, - numberOfWorkers: props.numberOrWorkers, + numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, timeout: props.timeout?.toMinutes(), diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py new file mode 100644 index 0000000000000..e75154b7c390f --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py @@ -0,0 +1 @@ +print("hello world") \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json new file mode 100644 index 0000000000000..9ae715372c7a4 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json @@ -0,0 +1,32 @@ +{ + "version": "36.0.0", + "files": { + "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": { + "source": { + "path": "asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + }, + "31c4e4edcf1bc8d265a2b100770a5f7b9fd8f4043b0cd4e3427418100d230420": { + "source": { + "path": "aws-glue-job-pyspark-etl.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "31c4e4edcf1bc8d265a2b100770a5f7b9fd8f4043b0cd4e3427418100d230420.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json new file mode 100644 index 0000000000000..a363e79085528 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json @@ -0,0 +1,202 @@ +{ + "Resources": { + "IAMServiceRole61C662C4": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "IAMServiceRoleDefaultPolicy379D1A0E": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "Roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "BasicPySparkETLJob833DD8C4": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueetl", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-metrics": "" + }, + "GlueVersion": "4.0", + "NumberOfWorkers": 10, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "WorkerType": "G.2X" + } + }, + "OverridePySparkETLJob85E17065": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueetl", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "Description": "Optional Override PySpark ETL Job", + "GlueVersion": "3.0", + "Name": "Optional Override PySpark ETL Job", + "NumberOfWorkers": 20, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "Tags": { + "key": "value" + }, + "Timeout": 15, + "WorkerType": "G.2X" + } + } + }, + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.assets.json new file mode 100644 index 0000000000000..aca979ed2ea11 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/integ.json new file mode 100644 index 0000000000000..bc938a726ac44 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-job-pyspark-etl-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-job-pyspark-etl" + ], + "assertionStack": "aws-glue-job-pyspark-etl-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json new file mode 100644 index 0000000000000..d089cc44922d2 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-job-pyspark-etl.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-job-pyspark-etl.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-job-pyspark-etl": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-job-pyspark-etl.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/31c4e4edcf1bc8d265a2b100770a5f7b9fd8f4043b0cd4e3427418100d230420.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-job-pyspark-etl.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-job-pyspark-etl.assets" + ], + "metadata": { + "/aws-glue-job-pyspark-etl/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-job-pyspark-etl/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-job-pyspark-etl/BasicPySparkETLJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicPySparkETLJob833DD8C4" + } + ], + "/aws-glue-job-pyspark-etl/OverridePySparkETLJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OverridePySparkETLJob85E17065" + } + ], + "/aws-glue-job-pyspark-etl/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-pyspark-etl/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-pyspark-etl" + }, + "awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluejobpysparketlintegtestDefaultTestDeployAssertED1ACE14.assets" + ], + "metadata": { + "/aws-glue-job-pyspark-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-pyspark-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-pyspark-etl-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json new file mode 100644 index 0000000000000..33509bc2ac971 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json @@ -0,0 +1,371 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-job-pyspark-etl": { + "id": "aws-glue-job-pyspark-etl", + "path": "aws-glue-job-pyspark-etl", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-pyspark-etl/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-pyspark-etl/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pyspark-etl/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-pyspark-etl/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pyspark-etl/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicPySparkETLJob": { + "id": "BasicPySparkETLJob", + "path": "aws-glue-job-pyspark-etl/BasicPySparkETLJob", + "children": { + "Code2907ea7be4a583708cfffc21b3df1dfa": { + "id": "Code2907ea7be4a583708cfffc21b3df1dfa", + "path": "aws-glue-job-pyspark-etl/BasicPySparkETLJob/Code2907ea7be4a583708cfffc21b3df1dfa", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-job-pyspark-etl/BasicPySparkETLJob/Code2907ea7be4a583708cfffc21b3df1dfa/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-job-pyspark-etl/BasicPySparkETLJob/Code2907ea7be4a583708cfffc21b3df1dfa/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pyspark-etl/BasicPySparkETLJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-metrics": "" + }, + "glueVersion": "4.0", + "numberOfWorkers": 10, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkEtlJob", + "version": "0.0.0" + } + }, + "OverridePySparkETLJob": { + "id": "OverridePySparkETLJob", + "path": "aws-glue-job-pyspark-etl/OverridePySparkETLJob", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pyspark-etl/OverridePySparkETLJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "description": "Optional Override PySpark ETL Job", + "glueVersion": "3.0", + "name": "Optional Override PySpark ETL Job", + "numberOfWorkers": 20, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "timeout": 15, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkEtlJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-pyspark-etl/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-pyspark-etl/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-job-pyspark-etl-integ-test": { + "id": "aws-glue-job-pyspark-etl-integ-test", + "path": "aws-glue-job-pyspark-etl-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-job-pyspark-etl-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-job-pyspark-etl-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-job-pyspark-etl-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-pyspark-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-pyspark-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar new file mode 100644 index 0000000000000000000000000000000000000000..41a6aa95d5aff514ba19b9a9b4c8bfff3ec123a0 GIT binary patch literal 782 zcmWIWW@Zs#;Nak3*vZlp!GHuf8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1gMK3ugvA8%lz}Ne*gFx-=CBBp0qNAhjxvK;gu@`>~ z`>52>zR1ayf1cUPplvyuRMi&Qd-HS2{bBs4QMj$k@x`HcXZBSZe@?U8f6tylX5ssV zZOgjF!d_)mzeu?1c-^soVVO?w%=rcp2mkRl*RDF=w9s zc%joEIcMfi%~`9Zb)>khS6q5_X61#YiU%G{x~_iuL>T|Q1M^NUWW5kQyICwy>O=j? z>M2", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.assets.json new file mode 100644 index 0000000000000..7cb05d5c06149 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/integ.json new file mode 100644 index 0000000000000..486814268b7d5 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-job-scalaspark-etl-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-job-scalaspark-etl" + ], + "assertionStack": "aws-glue-job-scalaspark-etl-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json new file mode 100644 index 0000000000000..df57172ee83af --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-job-scalaspark-etl.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-job-scalaspark-etl.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-job-scalaspark-etl": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-job-scalaspark-etl.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/cd441b1457833c6e143bf2319249b20c17a6d32d800107d402d4128c52139d87.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-job-scalaspark-etl.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-job-scalaspark-etl.assets" + ], + "metadata": { + "/aws-glue-job-scalaspark-etl/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-job-scalaspark-etl/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-job-scalaspark-etl/BasicScalaSparkETLJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicScalaSparkETLJob5F894E39" + } + ], + "/aws-glue-job-scalaspark-etl/OverrideScalaSparkETLJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OverrideScalaSparkETLJobC019089C" + } + ], + "/aws-glue-job-scalaspark-etl/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-scalaspark-etl/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-scalaspark-etl" + }, + "awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluejobscalasparketlintegtestDefaultTestDeployAssertCA9A8121.assets" + ], + "metadata": { + "/aws-glue-job-scalaspark-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-scalaspark-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-scalaspark-etl-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json new file mode 100644 index 0000000000000..eef4eeba9d084 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json @@ -0,0 +1,371 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-job-scalaspark-etl": { + "id": "aws-glue-job-scalaspark-etl", + "path": "aws-glue-job-scalaspark-etl", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-scalaspark-etl/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-scalaspark-etl/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalaspark-etl/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-scalaspark-etl/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalaspark-etl/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicScalaSparkETLJob": { + "id": "BasicScalaSparkETLJob", + "path": "aws-glue-job-scalaspark-etl/BasicScalaSparkETLJob", + "children": { + "Codeb58a68516710fd95a65c427a7e567405": { + "id": "Codeb58a68516710fd95a65c427a7e567405", + "path": "aws-glue-job-scalaspark-etl/BasicScalaSparkETLJob/Codeb58a68516710fd95a65c427a7e567405", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-job-scalaspark-etl/BasicScalaSparkETLJob/Codeb58a68516710fd95a65c427a7e567405/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-job-scalaspark-etl/BasicScalaSparkETLJob/Codeb58a68516710fd95a65c427a7e567405/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalaspark-etl/BasicScalaSparkETLJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" + ] + ] + } + }, + "defaultArguments": { + "--job-language": "scala", + "--class": "com.example.HelloWorld", + "--enable-metrics": "" + }, + "glueVersion": "4.0", + "numberOfWorkers": 10, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ScalaSparkEtlJob", + "version": "0.0.0" + } + }, + "OverrideScalaSparkETLJob": { + "id": "OverrideScalaSparkETLJob", + "path": "aws-glue-job-scalaspark-etl/OverrideScalaSparkETLJob", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalaspark-etl/OverrideScalaSparkETLJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" + ] + ] + } + }, + "defaultArguments": { + "--job-language": "scala", + "--class": "com.example.HelloWorld", + "--enable-metrics": "", + "arg1": "value1", + "arg2": "value2" + }, + "description": "Optional Override ScalaSpark ETL Job", + "glueVersion": "3.0", + "name": "Optional Override ScalaSpark ETL Job", + "numberOfWorkers": 20, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "timeout": 15, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ScalaSparkEtlJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-scalaspark-etl/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-scalaspark-etl/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-job-scalaspark-etl-integ-test": { + "id": "aws-glue-job-scalaspark-etl-integ-test", + "path": "aws-glue-job-scalaspark-etl-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-job-scalaspark-etl-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-job-scalaspark-etl-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-job-scalaspark-etl-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-scalaspark-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-scalaspark-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file From 5fd85691cb94f62d9d9f86fb2031c6222ad536a4 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 25 Jun 2024 15:52:42 +0000 Subject: [PATCH 28/51] Updates based on glue team feedback --- packages/@aws-cdk/aws-glue-alpha/awslint.json | 3 +- .../@aws-cdk/aws-glue-alpha/lib/constants.ts | 11 ++++-- .../aws-glue-alpha/lib/jobs/flex-job.ts | 13 ------- .../@aws-cdk/aws-glue-alpha/lib/jobs/job.ts | 2 +- .../lib/jobs/pyspark-etl-job.ts | 31 +++++++++------- .../lib/jobs/pyspark-streaming-job.ts | 37 ++++++++++--------- .../lib/jobs/pysparkflex-etl-job.ts | 4 +- .../aws-glue-alpha/lib/jobs/ray-job.ts | 2 + .../lib/jobs/scala-spark-etl-job.ts | 4 +- .../lib/jobs/scala-spark-flex-etl-job.ts | 4 +- .../lib/jobs/scala-spark-streaming-job.ts | 15 ++------ .../aws-glue-alpha/lib/jobs/spark-etl-job.ts | 10 ----- .../aws-glue-alpha/lib/jobs/streaming-job.ts | 13 ------- 13 files changed, 61 insertions(+), 88 deletions(-) delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/awslint.json b/packages/@aws-cdk/aws-glue-alpha/awslint.json index 9555a7aea8590..b42bc9d01c6c8 100644 --- a/packages/@aws-cdk/aws-glue-alpha/awslint.json +++ b/packages/@aws-cdk/aws-glue-alpha/awslint.json @@ -60,6 +60,7 @@ "docs-public-apis:@aws-cdk/aws-glue-alpha.TableAttributes.tableArn", "docs-public-apis:@aws-cdk/aws-glue-alpha.TableAttributes.tableName", "docs-public-apis:@aws-cdk/aws-glue-alpha.TableBaseProps", - "docs-public-apis:@aws-cdk/aws-glue-alpha.TableProps" + "docs-public-apis:@aws-cdk/aws-glue-alpha.TableProps", + "docs-public-apis:@aws-cdk/aws-glue-alpha.PredicateLogical" ] } diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts index 67edec52c6273..efeb5d5f09001 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/constants.ts @@ -261,13 +261,18 @@ export enum MaxCapacity { DPU_1 = 1, } +/* * Represents the logical operator for combining multiple conditions in the Glue Trigger API. */ export enum PredicateLogical { - /** All conditions must be true for the predicate to be true. */ + /** + * All conditions must be true for the predicate to be true. + */ AND = 'AND', - /** At least one condition must be true for the predicate to be true. */ + /** + * At least one condition must be true for the predicate to be true. + */ ANY = 'ANY', } @@ -300,4 +305,4 @@ export enum CrawlerState { /** The crawler encountered an error during its operation. */ ERROR = 'ERROR', -} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts deleted file mode 100644 index 04cc4c7c2403f..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/flex-job.ts +++ /dev/null @@ -1,13 +0,0 @@ -/** - * Flex Jobs class - * - * Flex jobs supports Python and Scala language. - * The flexible execution class is appropriate for non-urgent jobs such as - * pre-production jobs, testing, and one-time data loads. - * Flexible job runs are supported for jobs using AWS Glue version 3.0 or later and G.1X or - * G.2X worker types but will default to the latest version of Glue (currently Glue 3.0.) - * - * Similar to ETL, we’ll enable these features: —enable-metrics, —enable-spark-ui, - * —enable-continuous-cloudwatch-log - * - */ diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts index 8dbe99ea6d916..9892af15ea037 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts @@ -354,7 +354,7 @@ export interface JobProperties { * Worker Type (optional) * Type of Worker for Glue to use during job execution * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_2X + * @default G_1X **/ readonly workerType?: WorkerType; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts index d6130549883c7..90573d5cae909 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -5,6 +5,7 @@ import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '../constants'; import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; +import { Code } from '../code'; /** * Spark ETL Jobs class @@ -38,8 +39,16 @@ export interface PySparkEtlJobProps extends JobProperties { * S3 URL where additional python dependencies are located * @default - no extra files */ - readonly extraPythonFiles?: string[]; + readonly extraPythonFiles?: Code[]; + /** + * Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it. + * + * @default - no extra files specified. + * + * @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly extraFiles?: Code[]; } /** @@ -87,6 +96,7 @@ export class PySparkEtlJob extends Job { // Enable CloudWatch metrics and continuous logging by default as a best practice const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; const profilingMetricsArgs = { '--enable-metrics': '' }; + const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; // Gather executable arguments const execuatbleArgs = this.executableArguments(props); @@ -96,6 +106,7 @@ export class PySparkEtlJob extends Job { ...execuatbleArgs, ...continuousLoggingArgs, ...profilingMetricsArgs, + ...observabilityMetricsArgs, ...sparkUIArgs?.args, ...this.checkNoReservedArgs(props.defaultArguments), }; @@ -114,7 +125,7 @@ export class PySparkEtlJob extends Job { pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, @@ -141,20 +152,12 @@ export class PySparkEtlJob extends Job { const args: { [key: string]: string } = {}; args['--job-language'] = JobLanguage.PYTHON; - // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { - //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + if (props.extraFiles && props.extraFiles.length > 0) { + args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); } - - // if (props.extraJars && props.extraJars?.length > 0) { - // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraFiles && props.extraFiles.length > 0) { - // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraJarsFirst) { - // args['--user-jars-first'] = 'true'; - // } return args; } diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index f09d2038e40fc..c13f10e8fbb0f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -20,6 +20,7 @@ import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; import { JobType, GlueVersion, JobLanguage, PythonVersion, WorkerType } from '../constants'; import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSparkUiPrefixForGrant } from './spark-ui-utils'; +import { Code } from '../code'; /** * Properties for creating a Python Spark ETL job @@ -41,7 +42,17 @@ export interface PySparkStreamingJobProps extends JobProperties { * S3 URL where additional python dependencies are located * @default - no extra files */ - readonly extraPythonFiles?: string[]; + + readonly extraPythonFiles?: Code[]; + /** + * Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it. + * + * @default - no extra files specified. + * + * @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html + */ + readonly extraFiles?: Code[]; + } /** @@ -89,6 +100,7 @@ export class PySparkStreamingJob extends Job { // Enable CloudWatch metrics and continuous logging by default as a best practice const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; const profilingMetricsArgs = { '--enable-metrics': '' }; + const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; // Gather executable arguments const executableArgs = this.executableArguments(props); @@ -98,14 +110,11 @@ export class PySparkStreamingJob extends Job { ...executableArgs, ...continuousLoggingArgs, ...profilingMetricsArgs, + ...observabilityMetricsArgs, ...sparkUIArgs?.args, ...this.checkNoReservedArgs(props.defaultArguments), }; - // if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { - // throw new Error('Both workerType and numberOrWorkers must be set'); - // } - const jobResource = new CfnJob(this, 'Resource', { name: props.jobName, description: props.description, @@ -116,7 +125,7 @@ export class PySparkStreamingJob extends Job { pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, @@ -142,20 +151,12 @@ export class PySparkStreamingJob extends Job { const args: { [key: string]: string } = {}; args['--job-language'] = JobLanguage.PYTHON; - // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any if (props.extraPythonFiles && props.extraPythonFiles.length > 0) { - //args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); + } + if (props.extraFiles && props.extraFiles.length > 0) { + args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); } - - // if (props.extraJars && props.extraJars?.length > 0) { - // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraFiles && props.extraFiles.length > 0) { - // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraJarsFirst) { - // args['--user-jars-first'] = 'true'; - // } return args; } diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts index c3ae840c46ca8..8559073016f95 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts @@ -106,6 +106,7 @@ export class PySparkFlexEtlJob extends Job { // Enable CloudWatch metrics and continuous logging by default as a best practice const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; const profilingMetricsArgs = { '--enable-metrics': '' }; + const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; // Gather executable arguments const execuatbleArgs = this.executableArguments(props); @@ -115,6 +116,7 @@ export class PySparkFlexEtlJob extends Job { ...execuatbleArgs, ...continuousLoggingArgs, ...profilingMetricsArgs, + ...observabilityMetricsArgs, ...sparkUIArgs?.args, ...this.checkNoReservedArgs(props.defaultArguments), }; @@ -129,7 +131,7 @@ export class PySparkFlexEtlJob extends Job { pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts index a7a3d0a4bbdc1..86d658d70ba66 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts @@ -63,12 +63,14 @@ export class RayJob extends Job { // Enable CloudWatch metrics and continuous logging by default as a best practice const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; const profilingMetricsArgs = { '--enable-metrics': '' }; + const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; // Combine command line arguments into a single line item const defaultArguments = { ...this.checkNoReservedArgs(props.defaultArguments), ...continuousLoggingArgs, ...profilingMetricsArgs, + ...observabilityMetricsArgs, }; if (props.workerType && props.workerType !== WorkerType.Z_2X) { diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts index 8448da9801b43..e9f4991592524 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -97,6 +97,7 @@ export class ScalaSparkEtlJob extends Job { // Enable CloudWatch metrics and continuous logging by default as a best practice const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; const profilingMetricsArgs = { '--enable-metrics': '' }; + const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; // Gather executable arguments const execuatbleArgs = this.executableArguments(props); @@ -111,6 +112,7 @@ export class ScalaSparkEtlJob extends Job { ...execuatbleArgs, ...continuousLoggingArgs, ...profilingMetricsArgs, + ...observabilityMetricsArgs, ...sparkUIArgs?.args, ...this.checkNoReservedArgs(props.defaultArguments), }; @@ -128,7 +130,7 @@ export class ScalaSparkEtlJob extends Job { scriptLocation: this.codeS3ObjectUrl(props.script), }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts index 75a1272dcc548..7cb22c9264a18 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts @@ -136,6 +136,7 @@ export class ScalaSparkFlexEtlJob extends Job { // Enable CloudWatch metrics and continuous logging by default as a best practice const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; const profilingMetricsArgs = { '--enable-metrics': '' }; + const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; // Gather executable arguments const execuatbleArgs = this.executableArguments(props); @@ -149,6 +150,7 @@ export class ScalaSparkFlexEtlJob extends Job { ...execuatbleArgs, ...continuousLoggingArgs, ...profilingMetricsArgs, + ...observabilityMetricsArgs, ...sparkUIArgs?.args, ...this.checkNoReservedArgs(props.defaultArguments), }; @@ -162,7 +164,7 @@ export class ScalaSparkFlexEtlJob extends Job { scriptLocation: this.codeS3ObjectUrl(props.script), }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts index 487c8f146ebd6..f085242ff01af 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts @@ -96,6 +96,7 @@ export class ScalaSparkStreamingJob extends Job { // Enable CloudWatch metrics and continuous logging by default as a best practice const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; const profilingMetricsArgs = { '--enable-metrics': '' }; + const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; // Gather executable arguments const executableArgs = this.executableArguments(props); @@ -110,6 +111,7 @@ export class ScalaSparkStreamingJob extends Job { ...executableArgs, ...continuousLoggingArgs, ...profilingMetricsArgs, + ...observabilityMetricsArgs, ...sparkUIArgs?.args, ...this.checkNoReservedArgs(props.defaultArguments), }; @@ -127,7 +129,7 @@ export class ScalaSparkStreamingJob extends Job { scriptLocation: this.codeS3ObjectUrl(props.script), }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, @@ -154,17 +156,6 @@ export class ScalaSparkStreamingJob extends Job { args['--job-language'] = JobLanguage.SCALA; args['--class'] = props.className!; - // TODO: Confirm with Glue service team what the mapping is from extra-x to job language, if any - if (props.extraJars && props.extraJars?.length > 0) { - // args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); - } - // if (props.extraFiles && props.extraFiles.length > 0) { - // args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); - // } - // if (props.extraJarsFirst) { - // args['--user-jars-first'] = 'true'; - // } - return args; } diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts deleted file mode 100644 index 52a13d3e48ca1..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/spark-etl-job.ts +++ /dev/null @@ -1,10 +0,0 @@ -/** - * Spark ETL Jobs class - * - * ETL jobs supports Python and Scala language. - * ETL job type supports G1, G2, G4 and G8 worker type default as G2, which customer can override. - * It wil default to the best practice version of ETL 4.0, but allow developers to override to 3.0. - * We will also default to best practice enablement the following ETL features: - * —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. - * - */ \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts deleted file mode 100644 index 54a2ac9150336..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/streaming-job.ts +++ /dev/null @@ -1,13 +0,0 @@ -/** - * Streaming Jobs class - * - * A Streaming job is similar to an ETL job, except that it performs ETL on data streams - * using the Apache Spark Structured Streaming framework. - * These jobs will default to use Python 3.9. - * - * Similar to ETL jobs, streaming job supports Scala and Python languages. Similar to ETL, - * it supports G1 and G2 worker type and 2.0, 3.0 and 4.0 version. We’ll default to G2 worker - * and 4.0 version for streaming jobs which developers can override. - * We will enable —enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log. - * - */ From c33b0b92577b45dbe0874c9d5bb40513e628adca Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 25 Jun 2024 16:23:23 +0000 Subject: [PATCH 29/51] Updated tests and results --- .../aws-glue-job-pyspark-etl.assets.json | 4 ++-- .../aws-glue-job-pyspark-etl.template.json | 6 ++++-- .../test/integ.job-pyspark-etl.js.snapshot/manifest.json | 2 +- .../test/integ.job-pyspark-etl.js.snapshot/tree.json | 6 ++++-- .../aws-glue-job-pyspark-streaming.assets.json | 4 ++-- .../aws-glue-job-pyspark-streaming.template.json | 6 ++++-- .../integ.job-pyspark-streaming.js.snapshot/manifest.json | 2 +- .../test/integ.job-pyspark-streaming.js.snapshot/tree.json | 6 ++++-- .../aws-glue-job-pysparkflex-etl.assets.json | 4 ++-- .../aws-glue-job-pysparkflex-etl.template.json | 6 ++++-- .../integ.job-pysparkflex-etl.js.snapshot/manifest.json | 2 +- .../test/integ.job-pysparkflex-etl.js.snapshot/tree.json | 6 ++++-- .../aws-glue-job-scalaspark-etl.assets.json | 4 ++-- .../aws-glue-job-scalaspark-etl.template.json | 6 ++++-- .../test/integ.job-scalaspark-etl.js.snapshot/manifest.json | 2 +- .../test/integ.job-scalaspark-etl.js.snapshot/tree.json | 6 ++++-- .../aws-glue-job-scalaspark-streaming.assets.json | 4 ++-- .../aws-glue-job-scalaspark-streaming.template.json | 6 ++++-- .../manifest.json | 2 +- .../integ.job-scalaspark-streaming.js.snapshot/tree.json | 6 ++++-- .../aws-glue-job-scalasparkflex-etl.assets.json | 4 ++-- .../aws-glue-job-scalasparkflex-etl.template.json | 6 ++++-- .../integ.job-scalasparkflex-etl.js.snapshot/manifest.json | 2 +- .../test/integ.job-scalasparkflex-etl.js.snapshot/tree.json | 6 ++++-- .../integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json | 4 ++-- .../aws-glue-ray-job.template.json | 6 ++++-- .../test/integ.ray-job.js.snapshot/manifest.json | 2 +- .../aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json | 6 ++++-- .../@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts | 4 ++-- .../aws-glue-alpha/test/pyspark-streaming-jobs.test.ts | 4 ++-- .../aws-glue-alpha/test/pysparkflex-etl-jobs.test.ts | 4 ++-- .../aws-glue-alpha/test/scalaspark-etl-jobs.test.ts | 4 ++-- .../aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts | 4 ++-- .../aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts | 4 ++-- 34 files changed, 89 insertions(+), 61 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json index 9ae715372c7a4..4f84798bcf09f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "31c4e4edcf1bc8d265a2b100770a5f7b9fd8f4043b0cd4e3427418100d230420": { + "93158bcd276e06327430a2314576a92d70f19939481431744c4712f04232a338": { "source": { "path": "aws-glue-job-pyspark-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "31c4e4edcf1bc8d265a2b100770a5f7b9fd8f4043b0cd4e3427418100d230420.json", + "objectKey": "93158bcd276e06327430a2314576a92d70f19939481431744c4712f04232a338.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json index a363e79085528..28261083fef1e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json @@ -109,7 +109,8 @@ }, "DefaultArguments": { "--job-language": "python", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "GlueVersion": "4.0", "NumberOfWorkers": 10, @@ -119,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverridePySparkETLJob85E17065": { @@ -144,6 +145,7 @@ "DefaultArguments": { "--job-language": "python", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json index d089cc44922d2..788027abd0280 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/31c4e4edcf1bc8d265a2b100770a5f7b9fd8f4043b0cd4e3427418100d230420.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/93158bcd276e06327430a2314576a92d70f19939481431744c4712f04232a338.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json index 33509bc2ac971..45fa2d748766e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json @@ -194,7 +194,8 @@ }, "defaultArguments": { "--job-language": "python", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "glueVersion": "4.0", "numberOfWorkers": 10, @@ -204,7 +205,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { @@ -247,6 +248,7 @@ "defaultArguments": { "--job-language": "python", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json index 3cef0dba9ddd4..3d198dc8e0e4c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json @@ -14,7 +14,7 @@ } } }, - "32dbc2000ae8315bdff7bd4dd248e2d28c945a6879dfe6be766b33f41734f2a3": { + "c90f1c4b88575cc10fa94f4f42c02a4525156fd7e9f0b97f27b8911edea00200": { "source": { "path": "aws-glue-job-pyspark-streaming.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "32dbc2000ae8315bdff7bd4dd248e2d28c945a6879dfe6be766b33f41734f2a3.json", + "objectKey": "c90f1c4b88575cc10fa94f4f42c02a4525156fd7e9f0b97f27b8911edea00200.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json index f85276688bcdd..b9a0cdf8f9d76 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json @@ -109,7 +109,8 @@ }, "DefaultArguments": { "--job-language": "python", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "GlueVersion": "4.0", "NumberOfWorkers": 10, @@ -119,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverridePySparkStreamingJob58DE176A": { @@ -144,6 +145,7 @@ "DefaultArguments": { "--job-language": "python", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json index 77681a4e3bb1d..204ff49c3bf11 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/32dbc2000ae8315bdff7bd4dd248e2d28c945a6879dfe6be766b33f41734f2a3.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/c90f1c4b88575cc10fa94f4f42c02a4525156fd7e9f0b97f27b8911edea00200.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json index 8bcdcde9aee03..1978333231852 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json @@ -194,7 +194,8 @@ }, "defaultArguments": { "--job-language": "python", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "glueVersion": "4.0", "numberOfWorkers": 10, @@ -204,7 +205,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { @@ -247,6 +248,7 @@ "defaultArguments": { "--job-language": "python", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json index b5ad08acc9ab4..69ac81cf61856 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "6f1761f37e0e58957866339a668ae140999f5800a1285a839297c3593438c1ea": { + "526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff": { "source": { "path": "aws-glue-job-pysparkflex-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "6f1761f37e0e58957866339a668ae140999f5800a1285a839297c3593438c1ea.json", + "objectKey": "526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json index af52f7c3eca39..971a1cb357e36 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json @@ -109,7 +109,8 @@ }, "DefaultArguments": { "--job-language": "python", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "ExecutionClass": "FLEX", "GlueVersion": "3.0", @@ -120,7 +121,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverridePySparkFlexEtlJob8EE4CFA1": { @@ -145,6 +146,7 @@ "DefaultArguments": { "--job-language": "python", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json index 56ea621a7e015..8fb3110de8188 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/6f1761f37e0e58957866339a668ae140999f5800a1285a839297c3593438c1ea.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json index c28d10218218d..d3ebe4e6fb7ec 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json @@ -194,7 +194,8 @@ }, "defaultArguments": { "--job-language": "python", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "executionClass": "FLEX", "glueVersion": "3.0", @@ -205,7 +206,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { @@ -248,6 +249,7 @@ "defaultArguments": { "--job-language": "python", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json index 4e91d4d1c7dce..bb01f581282b2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "cd441b1457833c6e143bf2319249b20c17a6d32d800107d402d4128c52139d87": { + "1b363a750832e7361aa9ffe1e7a97587a65d5ea174e5a00960868a0e56967189": { "source": { "path": "aws-glue-job-scalaspark-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "cd441b1457833c6e143bf2319249b20c17a6d32d800107d402d4128c52139d87.json", + "objectKey": "1b363a750832e7361aa9ffe1e7a97587a65d5ea174e5a00960868a0e56967189.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json index 92a529b6c224e..e475d0368c047 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json @@ -109,7 +109,8 @@ "DefaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "GlueVersion": "4.0", "NumberOfWorkers": 10, @@ -119,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverrideScalaSparkETLJobC019089C": { @@ -144,6 +145,7 @@ "--job-language": "scala", "--class": "com.example.HelloWorld", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json index df57172ee83af..e3e06c10a6609 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/cd441b1457833c6e143bf2319249b20c17a6d32d800107d402d4128c52139d87.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/1b363a750832e7361aa9ffe1e7a97587a65d5ea174e5a00960868a0e56967189.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json index eef4eeba9d084..473ba648d15be 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json @@ -194,7 +194,8 @@ "defaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "glueVersion": "4.0", "numberOfWorkers": 10, @@ -204,7 +205,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { @@ -247,6 +248,7 @@ "--job-language": "scala", "--class": "com.example.HelloWorld", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json index 83c688970dfe0..96c25f918d60e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json @@ -14,7 +14,7 @@ } } }, - "c7b60611a3ef84ec57bf6098baf969990922367a029d69e2aecede0c306a66cc": { + "36eef53bd55456183ff275e1ea5ce97d1544aee6a149d80bff0864e7a3f995bc": { "source": { "path": "aws-glue-job-scalaspark-streaming.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "c7b60611a3ef84ec57bf6098baf969990922367a029d69e2aecede0c306a66cc.json", + "objectKey": "36eef53bd55456183ff275e1ea5ce97d1544aee6a149d80bff0864e7a3f995bc.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json index b9210a71faa1d..a0ef35b3188fd 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json @@ -109,7 +109,8 @@ "DefaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "GlueVersion": "4.0", "NumberOfWorkers": 10, @@ -119,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverrideScalaSparkStreamingJob598931ED": { @@ -144,6 +145,7 @@ "--job-language": "scala", "--class": "com.example.HelloWorld", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json index 0c5491440ec91..f6830e1172c1d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/c7b60611a3ef84ec57bf6098baf969990922367a029d69e2aecede0c306a66cc.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/36eef53bd55456183ff275e1ea5ce97d1544aee6a149d80bff0864e7a3f995bc.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json index 031e0e13c6ee8..5b27549cab6e6 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json @@ -194,7 +194,8 @@ "defaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "glueVersion": "4.0", "numberOfWorkers": 10, @@ -204,7 +205,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { @@ -247,6 +248,7 @@ "--job-language": "scala", "--class": "com.example.HelloWorld", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json index a01a92191d2dd..246e028be6d6f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "4e827540a0ec17542c449dee42f9232d04eedd33c52dc7d704053aa70635b75d": { + "ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f": { "source": { "path": "aws-glue-job-scalasparkflex-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "4e827540a0ec17542c449dee42f9232d04eedd33c52dc7d704053aa70635b75d.json", + "objectKey": "ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json index 9bbff43bace40..f046068d73e7c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json @@ -109,7 +109,8 @@ "DefaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "ExecutionClass": "FLEX", "GlueVersion": "3.0", @@ -120,7 +121,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverrideScalaSparkFlexEtlJob843D93B4": { @@ -145,6 +146,7 @@ "--job-language": "scala", "--class": "com.example.HelloWorld", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json index 4342aeb236671..76778efc60610 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/4e827540a0ec17542c449dee42f9232d04eedd33c52dc7d704053aa70635b75d.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json index 1e83043814cff..c8dc5ada88490 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.js.snapshot/tree.json @@ -194,7 +194,8 @@ "defaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "executionClass": "FLEX", "glueVersion": "3.0", @@ -205,7 +206,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { @@ -248,6 +249,7 @@ "--job-language": "scala", "--class": "com.example.HelloWorld", "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json index 02da523f19100..263566b22517d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json @@ -14,7 +14,7 @@ } } }, - "a8fda7378f7edbed186ceae92474d861037062e15e578b62a4ba9cfdb80a57b8": { + "19150b29a8e6a63aa62cb365cef33a8598dd853733842bde8c714f2193ca3e64": { "source": { "path": "aws-glue-ray-job.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "a8fda7378f7edbed186ceae92474d861037062e15e578b62a4ba9cfdb80a57b8.json", + "objectKey": "19150b29a8e6a63aa62cb365cef33a8598dd853733842bde8c714f2193ca3e64.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json index 806a589c73b7c..39957f8f30da6 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json @@ -108,7 +108,8 @@ } }, "DefaultArguments": { - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "GlueVersion": "4.0", "NumberOfWorkers": 3, @@ -143,7 +144,8 @@ "DefaultArguments": { "arg1": "value1", "arg2": "value2", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "GlueVersion": "4.0", "Name": "RayJobWith5Workers", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json index aa92e327c484f..b840069f6db88 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/a8fda7378f7edbed186ceae92474d861037062e15e578b62a4ba9cfdb80a57b8.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/19150b29a8e6a63aa62cb365cef33a8598dd853733842bde8c714f2193ca3e64.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json index 22326c8095593..29df1252455be 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json @@ -193,7 +193,8 @@ "runtime": "Ray2.4" }, "defaultArguments": { - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "glueVersion": "4.0", "numberOfWorkers": 3, @@ -246,7 +247,8 @@ "defaultArguments": { "arg1": "value1", "arg2": "value2", - "--enable-metrics": "" + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "glueVersion": "4.0", "name": "RayJobWith5Workers", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts index f1628c83f70b6..2f3443be45adb 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts @@ -45,9 +45,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts index a6ca7f2e9f472..3320ffd288298 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts @@ -45,9 +45,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pysparkflex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pysparkflex-etl-jobs.test.ts index 77390ab39dc2a..099a81bf4f2c5 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pysparkflex-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pysparkflex-etl-jobs.test.ts @@ -39,9 +39,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts index 1e5957dce85e4..d718dfee4302f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts @@ -47,9 +47,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts index 853e28dfa4224..b6c44f3f0a154 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts @@ -47,9 +47,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts index f9e85e9b72e4b..e34afefea0b10 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts @@ -41,9 +41,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); From 029c52afd22235e61befea81c1a0d311dcc782bb Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 25 Jun 2024 16:44:44 +0000 Subject: [PATCH 30/51] Rename flex jobs per naming convention --- ...lex-etl-job.ts => pyspark-flex-etl-job.ts} | 0 ...9be7858a12b228a2ae6e5c10faccd9097b1e855.py | 1 + .../aws-glue-job-pysparkflex-etl.assets.json | 32 ++ ...aws-glue-job-pysparkflex-etl.template.json | 206 ++++++++++ ...efaultTestDeployAssert3F3EC951.assets.json | 19 + ...aultTestDeployAssert3F3EC951.template.json | 36 ++ .../cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 131 ++++++ .../tree.json | 375 ++++++++++++++++++ ...x-etl.ts => integ.job-pyspark-flex-etl.ts} | 0 ...3f8703573eb6b69528c5d52190d72579c91602.jar | Bin 0 -> 782 bytes ...ws-glue-job-scalasparkflex-etl.assets.json | 32 ++ ...-glue-job-scalasparkflex-etl.template.json | 206 ++++++++++ ...efaultTestDeployAssert8009E6FC.assets.json | 19 + ...aultTestDeployAssert8009E6FC.template.json | 36 ++ .../cdk.out | 1 + .../integ.json | 12 + .../manifest.json | 131 ++++++ .../tree.json | 375 ++++++++++++++++++ ...tl.ts => integ.job-scalaspark-flex-etl.ts} | 0 ....test.ts => pyspark-flex-etl-jobs.test.ts} | 0 ...st.ts => scalaspark-flex-etl-jobs.test.ts} | 0 23 files changed, 1625 insertions(+) rename packages/@aws-cdk/aws-glue-alpha/lib/jobs/{pysparkflex-etl-job.ts => pyspark-flex-etl-job.ts} (100%) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/tree.json rename packages/@aws-cdk/aws-glue-alpha/test/{integ.job-pysparkflex-etl.ts => integ.job-pyspark-flex-etl.ts} (100%) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json rename packages/@aws-cdk/aws-glue-alpha/test/{integ.job-scalasparkflex-etl.ts => integ.job-scalaspark-flex-etl.ts} (100%) rename packages/@aws-cdk/aws-glue-alpha/test/{pysparkflex-etl-jobs.test.ts => pyspark-flex-etl-jobs.test.ts} (100%) rename packages/@aws-cdk/aws-glue-alpha/test/{scalasparkflex-etl-jobs.test.ts => scalaspark-flex-etl-jobs.test.ts} (100%) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts similarity index 100% rename from packages/@aws-cdk/aws-glue-alpha/lib/jobs/pysparkflex-etl-job.ts rename to packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py new file mode 100644 index 0000000000000..e75154b7c390f --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py @@ -0,0 +1 @@ +print("hello world") \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json new file mode 100644 index 0000000000000..69ac81cf61856 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json @@ -0,0 +1,32 @@ +{ + "version": "36.0.0", + "files": { + "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": { + "source": { + "path": "asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + }, + "526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff": { + "source": { + "path": "aws-glue-job-pysparkflex-etl.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json new file mode 100644 index 0000000000000..971a1cb357e36 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json @@ -0,0 +1,206 @@ +{ + "Resources": { + "IAMServiceRole61C662C4": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "IAMServiceRoleDefaultPolicy379D1A0E": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "Roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "BasicPySparkFlexEtlJobC50DC250": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueetl", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "--enable-observability-metrics": "true" + }, + "ExecutionClass": "FLEX", + "GlueVersion": "3.0", + "NumberOfWorkers": 10, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "WorkerType": "G.1X" + } + }, + "OverridePySparkFlexEtlJob8EE4CFA1": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueetl", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "--enable-observability-metrics": "true", + "arg1": "value1", + "arg2": "value2" + }, + "Description": "Optional Override PySpark Flex Etl Job", + "ExecutionClass": "FLEX", + "GlueVersion": "3.0", + "Name": "Optional Override PySpark Flex Etl Job", + "NumberOfWorkers": 20, + "Role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "Tags": { + "key": "value" + }, + "Timeout": 15, + "WorkerType": "G.1X" + } + } + }, + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json new file mode 100644 index 0000000000000..d77fab393274a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/integ.json new file mode 100644 index 0000000000000..b837700f2ba0b --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-job-pysparkflex-etl" + ], + "assertionStack": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/manifest.json new file mode 100644 index 0000000000000..8fb3110de8188 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-job-pysparkflex-etl.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-job-pysparkflex-etl.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-job-pysparkflex-etl": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-job-pysparkflex-etl.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-job-pysparkflex-etl.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-job-pysparkflex-etl.assets" + ], + "metadata": { + "/aws-glue-job-pysparkflex-etl/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicPySparkFlexEtlJobC50DC250" + } + ], + "/aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OverridePySparkFlexEtlJob8EE4CFA1" + } + ], + "/aws-glue-job-pysparkflex-etl/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-pysparkflex-etl/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-pysparkflex-etl" + }, + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets" + ], + "metadata": { + "/aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/tree.json new file mode 100644 index 0000000000000..d3ebe4e6fb7ec --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/tree.json @@ -0,0 +1,375 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-job-pysparkflex-etl": { + "id": "aws-glue-job-pysparkflex-etl", + "path": "aws-glue-job-pysparkflex-etl", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicPySparkFlexEtlJob": { + "id": "BasicPySparkFlexEtlJob", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob", + "children": { + "Code2907ea7be4a583708cfffc21b3df1dfa": { + "id": "Code2907ea7be4a583708cfffc21b3df1dfa", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "--enable-observability-metrics": "true" + }, + "executionClass": "FLEX", + "glueVersion": "3.0", + "numberOfWorkers": 10, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "G.1X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkFlexEtlJob", + "version": "0.0.0" + } + }, + "OverridePySparkFlexEtlJob": { + "id": "OverridePySparkFlexEtlJob", + "path": "aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-metrics": "", + "--enable-observability-metrics": "true", + "arg1": "value1", + "arg2": "value2" + }, + "description": "Optional Override PySpark Flex Etl Job", + "executionClass": "FLEX", + "glueVersion": "3.0", + "name": "Optional Override PySpark Flex Etl Job", + "numberOfWorkers": 20, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "timeout": 15, + "workerType": "G.1X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkFlexEtlJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-job-pysparkflex-etl-integ-test": { + "id": "aws-glue-job-pysparkflex-etl-integ-test", + "path": "aws-glue-job-pysparkflex-etl-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.ts similarity index 100% rename from packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.ts rename to packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/asset.e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar new file mode 100644 index 0000000000000000000000000000000000000000..41a6aa95d5aff514ba19b9a9b4c8bfff3ec123a0 GIT binary patch literal 782 zcmWIWW@Zs#;Nak3*vZlp!GHuf8CV#6T|*poJ^kGD|D9rBU}gyLX6FE@V1gMK3ugvA8%lz}Ne*gFx-=CBBp0qNAhjxvK;gu@`>~ z`>52>zR1ayf1cUPplvyuRMi&Qd-HS2{bBs4QMj$k@x`HcXZBSZe@?U8f6tylX5ssV zZOgjF!d_)mzeu?1c-^soVVO?w%=rcp2mkRl*RDF=w9s zc%joEIcMfi%~`9Zb)>khS6q5_X61#YiU%G{x~_iuL>T|Q1M^NUWW5kQyICwy>O=j? z>M2", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json new file mode 100644 index 0000000000000..22bd76fefdc70 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/integ.json new file mode 100644 index 0000000000000..694662c13ef3a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest": { + "stacks": [ + "aws-glue-job-scalasparkflex-etl" + ], + "assertionStack": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert", + "assertionStackName": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json new file mode 100644 index 0000000000000..76778efc60610 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json @@ -0,0 +1,131 @@ +{ + "version": "36.0.0", + "artifacts": { + "aws-glue-job-scalasparkflex-etl.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "aws-glue-job-scalasparkflex-etl.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "aws-glue-job-scalasparkflex-etl": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "aws-glue-job-scalasparkflex-etl.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "aws-glue-job-scalasparkflex-etl.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "aws-glue-job-scalasparkflex-etl.assets" + ], + "metadata": { + "/aws-glue-job-scalasparkflex-etl/IAMServiceRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRole61C662C4" + } + ], + "/aws-glue-job-scalasparkflex-etl/IAMServiceRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "IAMServiceRoleDefaultPolicy379D1A0E" + } + ], + "/aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "BasicScalaSparkFlexEtlJobF8FD9EFB" + } + ], + "/aws-glue-job-scalasparkflex-etl/OverrideScalaSparkFlexEtlJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OverrideScalaSparkFlexEtlJob843D93B4" + } + ], + "/aws-glue-job-scalasparkflex-etl/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-scalasparkflex-etl/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-scalasparkflex-etl" + }, + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awsgluejobscalasparkflexetlintegtestDefaultTestDeployAssert8009E6FC.assets" + ], + "metadata": { + "/aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json new file mode 100644 index 0000000000000..c8dc5ada88490 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json @@ -0,0 +1,375 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "aws-glue-job-scalasparkflex-etl": { + "id": "aws-glue-job-scalasparkflex-etl", + "path": "aws-glue-job-scalasparkflex-etl", + "children": { + "IAMServiceRole": { + "id": "IAMServiceRole", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole", + "children": { + "ImportIAMServiceRole": { + "id": "ImportIAMServiceRole", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/ImportIAMServiceRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "managedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSGlueServiceRole" + ] + ] + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/IAMServiceRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", + "roles": [ + { + "Ref": "IAMServiceRole61C662C4" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "BasicScalaSparkFlexEtlJob": { + "id": "BasicScalaSparkFlexEtlJob", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob", + "children": { + "Codeb58a68516710fd95a65c427a7e567405": { + "id": "Codeb58a68516710fd95a65c427a7e567405", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Codeb58a68516710fd95a65c427a7e567405", + "children": { + "Stage": { + "id": "Stage", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Codeb58a68516710fd95a65c427a7e567405/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Codeb58a68516710fd95a65c427a7e567405/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/BasicScalaSparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" + ] + ] + } + }, + "defaultArguments": { + "--job-language": "scala", + "--class": "com.example.HelloWorld", + "--enable-metrics": "", + "--enable-observability-metrics": "true" + }, + "executionClass": "FLEX", + "glueVersion": "3.0", + "numberOfWorkers": 10, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "workerType": "G.1X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ScalaSparkFlexEtlJob", + "version": "0.0.0" + } + }, + "OverrideScalaSparkFlexEtlJob": { + "id": "OverrideScalaSparkFlexEtlJob", + "path": "aws-glue-job-scalasparkflex-etl/OverrideScalaSparkFlexEtlJob", + "children": { + "Resource": { + "id": "Resource", + "path": "aws-glue-job-scalasparkflex-etl/OverrideScalaSparkFlexEtlJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/e305655b966b957f91fcec580e3f8703573eb6b69528c5d52190d72579c91602.jar" + ] + ] + } + }, + "defaultArguments": { + "--job-language": "scala", + "--class": "com.example.HelloWorld", + "--enable-metrics": "", + "--enable-observability-metrics": "true", + "arg1": "value1", + "arg2": "value2" + }, + "description": "Optional Override ScalaSpark Flex Etl Job", + "executionClass": "FLEX", + "glueVersion": "3.0", + "name": "Optional Override ScalaSpark Flex Etl Job", + "numberOfWorkers": 20, + "role": { + "Fn::GetAtt": [ + "IAMServiceRole61C662C4", + "Arn" + ] + }, + "tags": { + "key": "value" + }, + "timeout": 15, + "workerType": "G.1X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.ScalaSparkFlexEtlJob", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-glue-job-scalasparkflex-etl-integ-test": { + "id": "aws-glue-job-scalasparkflex-etl-integ-test", + "path": "aws-glue-job-scalasparkflex-etl-integ-test", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-glue-job-scalasparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.ts similarity index 100% rename from packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalasparkflex-etl.ts rename to packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pysparkflex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts similarity index 100% rename from packages/@aws-cdk/aws-glue-alpha/test/pysparkflex-etl-jobs.test.ts rename to packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts similarity index 100% rename from packages/@aws-cdk/aws-glue-alpha/test/scalasparkflex-etl-jobs.test.ts rename to packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts From 75a4e6e03d85f358899b07410a03888a10c77a18 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Wed, 26 Jun 2024 22:35:35 +0000 Subject: [PATCH 31/51] Fix s3 path specified in --spark-event-logs-path and update glue version --- packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts | 4 ++-- .../@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts | 4 ++-- .../@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts | 4 ++-- .../@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts | 4 ++-- .../aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts | 4 ++-- .../aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts | 4 ++-- .../aws-glue-job-pyspark-etl.assets.json | 4 ++-- .../aws-glue-job-pyspark-etl.template.json | 2 +- .../test/integ.job-pyspark-etl.js.snapshot/manifest.json | 2 +- .../test/integ.job-pyspark-etl.js.snapshot/tree.json | 2 +- .../aws-glue-job-pyspark-streaming.assets.json | 4 ++-- .../aws-glue-job-pyspark-streaming.template.json | 2 +- .../integ.job-pyspark-streaming.js.snapshot/manifest.json | 2 +- .../test/integ.job-pyspark-streaming.js.snapshot/tree.json | 2 +- .../aws-glue-job-scalaspark-etl.assets.json | 4 ++-- .../aws-glue-job-scalaspark-etl.template.json | 2 +- .../test/integ.job-scalaspark-etl.js.snapshot/manifest.json | 2 +- .../test/integ.job-scalaspark-etl.js.snapshot/tree.json | 2 +- .../aws-glue-job-scalasparkflex-etl.assets.json | 4 ++-- .../aws-glue-job-scalasparkflex-etl.template.json | 2 +- .../integ.job-scalaspark-flex-etl.js.snapshot/manifest.json | 2 +- .../test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json | 2 +- .../aws-glue-job-scalaspark-streaming.assets.json | 4 ++-- .../aws-glue-job-scalaspark-streaming.template.json | 2 +- .../integ.job-scalaspark-streaming.js.snapshot/manifest.json | 2 +- .../test/integ.job-scalaspark-streaming.js.snapshot/tree.json | 2 +- .../@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts | 4 ++-- .../aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts | 4 ++-- .../aws-glue-alpha/test/pyspark-streaming-jobs.test.ts | 4 ++-- .../@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts | 4 ++-- .../aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts | 4 ++-- .../aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts | 4 ++-- 32 files changed, 49 insertions(+), 49 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts index 90573d5cae909..ea4c1a6cca858 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -125,7 +125,7 @@ export class PySparkEtlJob extends Job { pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_1X, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, @@ -169,7 +169,7 @@ export class PySparkEtlJob extends Job { bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); const args = { '--enable-spark-ui': 'true', - '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix).replace(/\/?$/, '/'), // path will always end with a slash }; return { diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts index 8559073016f95..f22ec6656956d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts @@ -131,7 +131,7 @@ export class PySparkFlexEtlJob extends Job { pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, - workerType: props.workerType ? props.workerType : WorkerType.G_1X, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, @@ -183,7 +183,7 @@ export class PySparkFlexEtlJob extends Job { bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); const args = { '--enable-spark-ui': 'true', - '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix).replace(/\/?$/, '/'), // path will always end with a slash }; return { diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index c13f10e8fbb0f..f07d9abfc0342 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -125,7 +125,7 @@ export class PySparkStreamingJob extends Job { pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_1X, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, @@ -168,7 +168,7 @@ export class PySparkStreamingJob extends Job { bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); const args = { '--enable-spark-ui': 'true', - '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix).replace(/\/?$/, '/'), // path will always end with a slash }; return { diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts index e9f4991592524..2c508202207f7 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -130,7 +130,7 @@ export class ScalaSparkEtlJob extends Job { scriptLocation: this.codeS3ObjectUrl(props.script), }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_1X, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, @@ -171,7 +171,7 @@ export class ScalaSparkEtlJob extends Job { bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); const args = { '--enable-spark-ui': 'true', - '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix).replace(/\/?$/, '/'), // path will always end with a slash }; return { diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts index 7cb22c9264a18..33928ac6d205d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts @@ -164,7 +164,7 @@ export class ScalaSparkFlexEtlJob extends Job { scriptLocation: this.codeS3ObjectUrl(props.script), }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, - workerType: props.workerType ? props.workerType : WorkerType.G_1X, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, @@ -220,7 +220,7 @@ export class ScalaSparkFlexEtlJob extends Job { bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); const args = { '--enable-spark-ui': 'true', - '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix).replace(/\/?$/, '/'), // path will always end with a slash }; return { diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts index f085242ff01af..d89c15b46ef40 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts @@ -129,7 +129,7 @@ export class ScalaSparkStreamingJob extends Job { scriptLocation: this.codeS3ObjectUrl(props.script), }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_1X, + workerType: props.workerType ? props.workerType : WorkerType.G_2X, numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, @@ -166,7 +166,7 @@ export class ScalaSparkStreamingJob extends Job { bucket.grantReadWrite(role, cleanSparkUiPrefixForGrant(sparkUiProps.prefix)); const args = { '--enable-spark-ui': 'true', - '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix), + '--spark-event-logs-path': bucket.s3UrlForObject(sparkUiProps.prefix).replace(/\/?$/, '/'), // path will always end with a slash }; return { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json index 4f84798bcf09f..5b7eb192d328a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "93158bcd276e06327430a2314576a92d70f19939481431744c4712f04232a338": { + "4463547df769ed17f0742accca0ff3fafe85200595d5fc0e41bb662e61244f9b": { "source": { "path": "aws-glue-job-pyspark-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "93158bcd276e06327430a2314576a92d70f19939481431744c4712f04232a338.json", + "objectKey": "4463547df769ed17f0742accca0ff3fafe85200595d5fc0e41bb662e61244f9b.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json index 28261083fef1e..b4f9e98f37c82 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json @@ -120,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.1X" + "WorkerType": "G.2X" } }, "OverridePySparkETLJob85E17065": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json index 788027abd0280..daac6d41a220f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/93158bcd276e06327430a2314576a92d70f19939481431744c4712f04232a338.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/4463547df769ed17f0742accca0ff3fafe85200595d5fc0e41bb662e61244f9b.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json index 45fa2d748766e..4d8d80ee19641 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json @@ -205,7 +205,7 @@ "Arn" ] }, - "workerType": "G.1X" + "workerType": "G.2X" } }, "constructInfo": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json index 3d198dc8e0e4c..fdf1a5470ac7d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json @@ -14,7 +14,7 @@ } } }, - "c90f1c4b88575cc10fa94f4f42c02a4525156fd7e9f0b97f27b8911edea00200": { + "71819bd0b99d0fc653e7361f6fe1a8a6c5fcadd0b52a1aeb4e49f7fa5d78115e": { "source": { "path": "aws-glue-job-pyspark-streaming.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "c90f1c4b88575cc10fa94f4f42c02a4525156fd7e9f0b97f27b8911edea00200.json", + "objectKey": "71819bd0b99d0fc653e7361f6fe1a8a6c5fcadd0b52a1aeb4e49f7fa5d78115e.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json index b9a0cdf8f9d76..3253ef37bca76 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json @@ -120,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.1X" + "WorkerType": "G.2X" } }, "OverridePySparkStreamingJob58DE176A": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json index 204ff49c3bf11..141b55d62076c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/c90f1c4b88575cc10fa94f4f42c02a4525156fd7e9f0b97f27b8911edea00200.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/71819bd0b99d0fc653e7361f6fe1a8a6c5fcadd0b52a1aeb4e49f7fa5d78115e.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json index 1978333231852..f9acb28811cc2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json @@ -205,7 +205,7 @@ "Arn" ] }, - "workerType": "G.1X" + "workerType": "G.2X" } }, "constructInfo": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json index bb01f581282b2..4ed64c3a1fa9e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "1b363a750832e7361aa9ffe1e7a97587a65d5ea174e5a00960868a0e56967189": { + "fe963e7893418cd93f28f2e8e97fe684357d3752eec25a95ff05d6ff51f1cc6a": { "source": { "path": "aws-glue-job-scalaspark-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "1b363a750832e7361aa9ffe1e7a97587a65d5ea174e5a00960868a0e56967189.json", + "objectKey": "fe963e7893418cd93f28f2e8e97fe684357d3752eec25a95ff05d6ff51f1cc6a.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json index e475d0368c047..c8b6395f713bc 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json @@ -120,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.1X" + "WorkerType": "G.2X" } }, "OverrideScalaSparkETLJobC019089C": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json index e3e06c10a6609..c93c9444a7e62 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/1b363a750832e7361aa9ffe1e7a97587a65d5ea174e5a00960868a0e56967189.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/fe963e7893418cd93f28f2e8e97fe684357d3752eec25a95ff05d6ff51f1cc6a.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json index 473ba648d15be..d35613ddc768d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json @@ -205,7 +205,7 @@ "Arn" ] }, - "workerType": "G.1X" + "workerType": "G.2X" } }, "constructInfo": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json index 246e028be6d6f..ca63d7d53bea7 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f": { + "237dbe47db947248325fa078c4275b440ec5fbd928f898566437c7273c49487d": { "source": { "path": "aws-glue-job-scalasparkflex-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f.json", + "objectKey": "237dbe47db947248325fa078c4275b440ec5fbd928f898566437c7273c49487d.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json index f046068d73e7c..a7b39ad547b08 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json @@ -121,7 +121,7 @@ "Arn" ] }, - "WorkerType": "G.1X" + "WorkerType": "G.2X" } }, "OverrideScalaSparkFlexEtlJob843D93B4": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json index 76778efc60610..3e947e62d8097 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/237dbe47db947248325fa078c4275b440ec5fbd928f898566437c7273c49487d.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json index c8dc5ada88490..dd37a0c8b14c4 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json @@ -206,7 +206,7 @@ "Arn" ] }, - "workerType": "G.1X" + "workerType": "G.2X" } }, "constructInfo": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json index 96c25f918d60e..203d49224d57a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json @@ -14,7 +14,7 @@ } } }, - "36eef53bd55456183ff275e1ea5ce97d1544aee6a149d80bff0864e7a3f995bc": { + "6912184a6fddb01d6471c0c01a1140018cfbb2317d9c43706f2baaf7ef71d8f2": { "source": { "path": "aws-glue-job-scalaspark-streaming.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "36eef53bd55456183ff275e1ea5ce97d1544aee6a149d80bff0864e7a3f995bc.json", + "objectKey": "6912184a6fddb01d6471c0c01a1140018cfbb2317d9c43706f2baaf7ef71d8f2.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json index a0ef35b3188fd..99ae06b7e8fba 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json @@ -120,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.1X" + "WorkerType": "G.2X" } }, "OverrideScalaSparkStreamingJob598931ED": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json index f6830e1172c1d..b8848ef046158 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/36eef53bd55456183ff275e1ea5ce97d1544aee6a149d80bff0864e7a3f995bc.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/6912184a6fddb01d6471c0c01a1140018cfbb2317d9c43706f2baaf7ef71d8f2.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json index 5b27549cab6e6..7fe9ebbe1ff9d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json @@ -205,7 +205,7 @@ "Arn" ] }, - "workerType": "G.1X" + "workerType": "G.2X" } }, "constructInfo": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts index 2f3443be45adb..f1628c83f70b6 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts @@ -45,9 +45,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.1X', () => { + test('Default WorkerType should be G.2X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.1X', + WorkerType: 'G.2X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts index 099a81bf4f2c5..77390ab39dc2a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts @@ -39,9 +39,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.1X', () => { + test('Default WorkerType should be G.2X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.1X', + WorkerType: 'G.2X', }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts index 3320ffd288298..a6ca7f2e9f472 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts @@ -45,9 +45,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.1X', () => { + test('Default WorkerType should be G.2X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.1X', + WorkerType: 'G.2X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts index d718dfee4302f..1e5957dce85e4 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts @@ -47,9 +47,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.1X', () => { + test('Default WorkerType should be G.2X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.1X', + WorkerType: 'G.2X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts index e34afefea0b10..f9e85e9b72e4b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts @@ -41,9 +41,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.1X', () => { + test('Default WorkerType should be G.2X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.1X', + WorkerType: 'G.2X', }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts index b6c44f3f0a154..853e28dfa4224 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts @@ -47,9 +47,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.1X', () => { + test('Default WorkerType should be G.2X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.1X', + WorkerType: 'G.2X', }); }); }); From 1cf6d29172828bf7cf70de84344732568f29cfe6 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Wed, 26 Jun 2024 22:38:34 +0000 Subject: [PATCH 32/51] Updated default glue version to v2 --- packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts index 9892af15ea037..8dbe99ea6d916 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts @@ -354,7 +354,7 @@ export interface JobProperties { * Worker Type (optional) * Type of Worker for Glue to use during job execution * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_1X + * @default G_2X **/ readonly workerType?: WorkerType; From d24c24aff1f332a622540f2161a0c0e1942f142f Mon Sep 17 00:00:00 2001 From: Natalie White Date: Thu, 27 Jun 2024 00:28:01 +0000 Subject: [PATCH 33/51] Delete Job Legacy classes, change default WorkerType back to G_1X --- packages/@aws-cdk/aws-glue-alpha/awslint.json | 9 +- packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 4 +- .../aws-glue-alpha/lib/job-executable.ts | 327 ------ packages/@aws-cdk/aws-glue-alpha/lib/job.ts | 727 ------------ .../@aws-cdk/aws-glue-alpha/lib/jobs/job.ts | 6 +- .../lib/jobs/pyspark-etl-job.ts | 11 +- .../lib/jobs/pyspark-flex-etl-job.ts | 4 +- .../lib/jobs/pyspark-streaming-job.ts | 4 +- .../aws-glue-alpha/lib/jobs/ray-job.ts | 2 +- .../lib/jobs/scala-spark-etl-job.ts | 8 +- .../lib/jobs/scala-spark-flex-etl-job.ts | 4 +- .../lib/jobs/scala-spark-streaming-job.ts | 8 +- .../@aws-cdk/aws-glue-alpha/test/code.test.ts | 56 +- .../test/integ.job-pyspark-etl.ts | 4 +- .../test/integ.job-pyspark-flex-etl.ts | 2 +- .../test/integ.job-pyspark-streaming.ts | 4 +- .../test/integ.job-scalaspark-etl.ts | 4 +- .../test/integ.job-scalaspark-flex-etl.ts | 2 +- .../test/integ.job-scalaspark-streaming.ts | 4 +- .../@aws-cdk/aws-glue-alpha/test/integ.job.ts | 141 --- .../aws-glue-alpha/test/integ.ray-job.ts | 2 +- .../aws-glue-alpha/test/integ.workflow.ts | 4 +- .../test/job-executable.test.ts | 215 ---- .../@aws-cdk/aws-glue-alpha/test/job.test.ts | 1029 ----------------- .../test/pyspark-etl-jobs.test.ts | 4 +- .../test/pyspark-flex-etl-jobs.test.ts | 4 +- .../test/pyspark-streaming-jobs.test.ts | 4 +- .../test/scalaspark-etl-jobs.test.ts | 4 +- .../test/scalaspark-flex-etl-jobs.test.ts | 4 +- .../test/scalaspark-streaming-jobs.test.ts | 4 +- .../test/workflow-triggers.test.ts | 4 +- 31 files changed, 83 insertions(+), 2526 deletions(-) delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/job.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/job.test.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/awslint.json b/packages/@aws-cdk/aws-glue-alpha/awslint.json index b42bc9d01c6c8..10a65be504fe6 100644 --- a/packages/@aws-cdk/aws-glue-alpha/awslint.json +++ b/packages/@aws-cdk/aws-glue-alpha/awslint.json @@ -51,16 +51,15 @@ "docs-public-apis:@aws-cdk/aws-glue-alpha.ITable", "docs-public-apis:@aws-cdk/aws-glue-alpha.ITable.tableArn", "docs-public-apis:@aws-cdk/aws-glue-alpha.ITable.tableName", - "props-default-doc:@aws-cdk/aws-glue-alpha.PythonRayExecutableProps.runtime", - "props-default-doc:@aws-cdk/aws-glue-alpha.PythonShellExecutableProps.runtime", - "props-default-doc:@aws-cdk/aws-glue-alpha.PythonSparkJobExecutableProps.runtime", "docs-public-apis:@aws-cdk/aws-glue-alpha.S3TableProps", - "props-default-doc:@aws-cdk/aws-glue-alpha.ScalaJobExecutableProps.runtime", "docs-public-apis:@aws-cdk/aws-glue-alpha.TableAttributes", "docs-public-apis:@aws-cdk/aws-glue-alpha.TableAttributes.tableArn", "docs-public-apis:@aws-cdk/aws-glue-alpha.TableAttributes.tableName", "docs-public-apis:@aws-cdk/aws-glue-alpha.TableBaseProps", "docs-public-apis:@aws-cdk/aws-glue-alpha.TableProps", - "docs-public-apis:@aws-cdk/aws-glue-alpha.PredicateLogical" + "docs-public-apis:@aws-cdk/aws-glue-alpha.PredicateLogical", + "no-unused-type:@aws-cdk/aws-glue-alpha.ExecutionClass", + "no-unused-type:@aws-cdk/aws-glue-alpha.JobLanguage", + "no-unused-type:@aws-cdk/aws-glue-alpha.JobType" ] } diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index 7d828c71e6510..ec13e6d7d0697 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -6,8 +6,6 @@ export * from './data-format'; export * from './data-quality-ruleset'; export * from './database'; export * from './external-table'; -export * from './job'; -export * from './job-executable'; export * from './s3-table'; export * from './schema'; export * from './security-configuration'; @@ -15,7 +13,7 @@ export * from './storage-parameter'; export * from './constants'; export * from './jobs/job'; export * from './jobs/pyspark-etl-job'; -export * from './jobs/pysparkflex-etl-job'; +export * from './jobs/pyspark-flex-etl-job'; export * from './jobs/pyspark-streaming-job'; export * from './jobs/python-shell-job'; export * from './jobs/ray-job'; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts deleted file mode 100644 index da73b2e17136e..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts +++ /dev/null @@ -1,327 +0,0 @@ -import { Code } from './code'; -import { GlueVersion, JobType, PythonVersion, Runtime, JobLanguage } from './constants'; - -interface PythonExecutableProps { - /** - * The Python version to use. - */ - readonly pythonVersion: PythonVersion; - - /** - * Additional Python files that AWS Glue adds to the Python path before executing your script. - * Only individual files are supported, directories are not supported. - * - * @default - no extra python files and argument is not set - * - * @see `--extra-py-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraPythonFiles?: Code[]; -} - -interface SharedJobExecutableProps { - /** - * Runtime. It is required for Ray jobs. - * - */ - readonly runtime?: Runtime; - - /** - * Glue version. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/release-notes.html - */ - readonly glueVersion: GlueVersion; - - /** - * The script that executes a job. - */ - readonly script: Code; - - /** - * Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it. - * Only individual files are supported, directories are not supported. - * - * @default [] - no extra files are copied to the working directory - * - * @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraFiles?: Code[]; -} - -interface SharedSparkJobExecutableProps extends SharedJobExecutableProps { - /** - * Additional Java .jar files that AWS Glue adds to the Java classpath before executing your script. - * Only individual files are supported, directories are not supported. - * - * @default [] - no extra jars are added to the classpath - * - * @see `--extra-jars` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraJars?: Code[]; - - /** - * Setting this value to true prioritizes the customer's extra JAR files in the classpath. - * - * @default false - priority is not given to user-provided jars - * - * @see `--user-jars-first` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraJarsFirst?: boolean; -} - -/** - * Props for creating a Scala Spark (ETL or Streaming) job executable - */ -export interface ScalaJobExecutableProps extends SharedSparkJobExecutableProps { - /** - * The fully qualified Scala class name that serves as the entry point for the job. - * - * @see `--class` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly className: string; -} - -/** - * Props for creating a Python Spark (ETL or Streaming) job executable - */ -export interface PythonSparkJobExecutableProps extends SharedSparkJobExecutableProps, PythonExecutableProps {} - -/** - * Props for creating a Python shell job executable - */ -export interface PythonShellExecutableProps extends SharedJobExecutableProps, PythonExecutableProps {} - -/** - * Props for creating a Python Ray job executable - */ -export interface PythonRayExecutableProps extends SharedJobExecutableProps, PythonExecutableProps {} - -/** - * The executable properties related to the Glue job's GlueVersion, JobType and code - */ -export class JobExecutable { - - /** - * Create Scala executable props for Apache Spark ETL job. - * - * @param props Scala Apache Spark Job props - */ - public static scalaEtl(props: ScalaJobExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.ETL, - language: JobLanguage.SCALA, - }); - } - - /** - * Create Scala executable props for Apache Spark Streaming job. - * - * @param props Scala Apache Spark Job props - */ - public static scalaStreaming(props: ScalaJobExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.STREAMING, - language: JobLanguage.SCALA, - }); - } - - /** - * Create Python executable props for Apache Spark ETL job. - * - * @param props Python Apache Spark Job props - */ - public static pythonEtl(props: PythonSparkJobExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.ETL, - language: JobLanguage.PYTHON, - }); - } - - /** - * Create Python executable props for Apache Spark Streaming job. - * - * @param props Python Apache Spark Job props - */ - public static pythonStreaming(props: PythonSparkJobExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.STREAMING, - language: JobLanguage.PYTHON, - }); - } - - /** - * Create Python executable props for python shell jobs. - * - * @param props Python Shell Job props. - */ - public static pythonShell(props: PythonShellExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.PYTHON_SHELL, - language: JobLanguage.PYTHON, - }); - } - - /** - * Create Python executable props for Ray jobs. - * - * @param props Ray Job props. - */ - public static pythonRay(props: PythonRayExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.RAY, - language: JobLanguage.PYTHON, - }); - } - - /** - * Create a custom JobExecutable. - * - * @param config custom job executable configuration. - */ - public static of(config: JobExecutableConfig): JobExecutable { - return new JobExecutable(config); - } - - private config: JobExecutableConfig; - - private constructor(config: JobExecutableConfig) { - const glueVersion = config.glueVersion; - const type = config.type; - if (JobType.PYTHON_SHELL === type) { - if (config.language !== JobLanguage.PYTHON) { - throw new Error('Python shell requires the language to be set to Python'); - } - if ([GlueVersion.V0_9, GlueVersion.V4_0].includes(glueVersion)) { - throw new Error(`Specified GlueVersion ${glueVersion} does not support Python Shell`); - } - } - if (JobType.RAY === type) { - if (config.language !== JobLanguage.PYTHON) { - throw new Error('Ray requires the language to be set to Python'); - } - if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0, GlueVersion.V3_0].includes(glueVersion)) { - throw new Error(`Specified GlueVersion ${glueVersion} does not support Ray`); - } - } - if (config.extraJarsFirst && [GlueVersion.V0_9, GlueVersion.V1_0].includes(glueVersion)) { - throw new Error(`Specified GlueVersion ${glueVersion} does not support extraJarsFirst`); - } - if (config.pythonVersion === PythonVersion.TWO && ![GlueVersion.V0_9, GlueVersion.V1_0].includes(glueVersion)) { - throw new Error(`Specified GlueVersion ${glueVersion} does not support PythonVersion ${config.pythonVersion}`); - } - if (JobLanguage.PYTHON !== config.language && config.extraPythonFiles) { - throw new Error('extraPythonFiles is not supported for languages other than JobLanguage.PYTHON'); - } - if (config.pythonVersion === PythonVersion.THREE_NINE && type !== JobType.PYTHON_SHELL && type !== JobType.RAY) { - throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell and Ray'); - } - if (config.pythonVersion === PythonVersion.THREE && type === JobType.RAY) { - throw new Error('Specified PythonVersion PythonVersion.THREE is not supported for Ray'); - } - if (config.runtime === undefined && type === JobType.RAY) { - throw new Error('Runtime is required for Ray jobs.'); - } - this.config = config; - } - - /** - * Called during Job initialization to get JobExecutableConfig. - */ - public bind(): JobExecutableConfig { - return this.config; - } -} - -/** - * Result of binding a `JobExecutable` into a `Job`. - */ -export interface JobExecutableConfig { - /** - * Glue version. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/release-notes.html - */ - readonly glueVersion: GlueVersion; - - /** - * The language of the job (Scala or Python). - * - * @see `--job-language` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly language: JobLanguage; - - /** - * Specify the type of the job whether it's an Apache Spark ETL or streaming one or if it's a Python shell job. - */ - readonly type: JobType; - - /** - * The Python version to use. - * - * @default - no python version specified - */ - readonly pythonVersion?: PythonVersion; - - /** - * The Runtime to use. - * - * @default - no runtime specified - */ - readonly runtime?: Runtime; - - /** - * The script that is executed by a job. - */ - readonly script: Code; - - /** - * The Scala class that serves as the entry point for the job. This applies only if your the job langauage is Scala. - * - * @default - no scala className specified - * - * @see `--class` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly className?: string; - - /** - * Additional Java .jar files that AWS Glue adds to the Java classpath before executing your script. - * - * @default - no extra jars specified. - * - * @see `--extra-jars` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraJars?: Code[]; - - /** - * Additional Python files that AWS Glue adds to the Python path before executing your script. - * - * @default - no extra python files specified. - * - * @see `--extra-py-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraPythonFiles?: Code[]; - - /** - * Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it. - * - * @default - no extra files specified. - * - * @see `--extra-files` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraFiles?: Code[]; - - /** - * Setting this value to true prioritizes the customer's extra JAR files in the classpath. - * - * @default - extra jars are not prioritized. - * - * @see `--user-jars-first` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraJarsFirst?: boolean; -} diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts deleted file mode 100644 index 0ebafb3db0c58..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts +++ /dev/null @@ -1,727 +0,0 @@ -import { EOL } from 'os'; -import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; -import * as events from 'aws-cdk-lib/aws-events'; -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as logs from 'aws-cdk-lib/aws-logs'; -import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as cdk from 'aws-cdk-lib/core'; -import { Code } from '.'; -import { JobExecutable, JobExecutableConfig } from './job-executable'; -import { IConnection } from './connection'; -import { CfnJob } from 'aws-cdk-lib/aws-glue'; -import { ISecurityConfiguration } from './security-configuration'; -import { JobType, JobState, MetricType, ExecutionClass, WorkerType } from './constants'; -import { Construct } from 'constructs'; -import { SparkUIProps, SparkUILoggingLocation } from './jobs/spark-ui-utils'; - -/** - * Interface representing a created or an imported `Job`. - */ -export interface IJobLegacy extends cdk.IResource, iam.IGrantable { - /** - * The name of the job. - * @attribute - */ - readonly jobName: string; - - /** - * The ARN of the job. - * @attribute - */ - readonly jobArn: string; - - /** - * Defines a CloudWatch event rule triggered when something happens with this job. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - onEvent(id: string, options?: events.OnEventOptions): events.Rule; - - /** - * Defines a CloudWatch event rule triggered when this job moves to the SUCCEEDED state. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - onSuccess(id: string, options?: events.OnEventOptions): events.Rule; - - /** - * Defines a CloudWatch event rule triggered when this job moves to the FAILED state. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - onFailure(id: string, options?: events.OnEventOptions): events.Rule; - - /** - * Defines a CloudWatch event rule triggered when this job moves to the TIMEOUT state. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - onTimeout(id: string, options?: events.OnEventOptions): events.Rule; - - /** - * Create a CloudWatch metric. - * - * @param metricName name of the metric typically prefixed with `glue.driver.`, `glue..` or `glue.ALL.`. - * @param type the metric type. - * @param props metric options. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html - */ - metric(metricName: string, type: MetricType, props?: cloudwatch.MetricOptions): cloudwatch.Metric; - - /** - * Create a CloudWatch Metric indicating job success. - */ - metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric; - - /** - * Create a CloudWatch Metric indicating job failure. - */ - metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric; - - /** - * Create a CloudWatch Metric indicating job timeout. - */ - metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric; -} - -abstract class JobBaseLegacy extends cdk.Resource implements IJobLegacy { - - public abstract readonly jobArn: string; - public abstract readonly jobName: string; - public abstract readonly grantPrincipal: iam.IPrincipal; - - /** - * Create a CloudWatch Event Rule for this Glue Job when it's in a given state - * - * @param id construct id - * @param options event options. Note that some values are overridden if provided, these are - * - eventPattern.source = ['aws.glue'] - * - eventPattern.detailType = ['Glue Job State Change', 'Glue Job Run Status'] - * - eventPattern.detail.jobName = [this.jobName] - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - public onEvent(id: string, options: events.OnEventOptions = {}): events.Rule { - const rule = new events.Rule(this, id, options); - rule.addTarget(options.target); - rule.addEventPattern({ - source: ['aws.glue'], - detailType: ['Glue Job State Change', 'Glue Job Run Status'], - detail: { - jobName: [this.jobName], - }, - }); - return rule; - } - - /** - * Create a CloudWatch Event Rule for the transition into the input jobState. - * - * @param id construct id. - * @param jobState the job state. - * @param options optional event options. - */ - protected onStateChange(id: string, jobState: JobState, options: events.OnEventOptions = {}): events.Rule { - const rule = this.onEvent(id, { - description: `Rule triggered when Glue job ${this.jobName} is in ${jobState} state`, - ...options, - }); - rule.addEventPattern({ - detail: { - state: [jobState], - }, - }); - return rule; - } - - /** - * Create a CloudWatch Event Rule matching JobState.SUCCEEDED. - * - * @param id construct id. - * @param options optional event options. default is {}. - */ - public onSuccess(id: string, options: events.OnEventOptions = {}): events.Rule { - return this.onStateChange(id, JobState.SUCCEEDED, options); - } - - /** - * Return a CloudWatch Event Rule matching FAILED state. - * - * @param id construct id. - * @param options optional event options. default is {}. - */ - public onFailure(id: string, options: events.OnEventOptions = {}): events.Rule { - return this.onStateChange(id, JobState.FAILED, options); - } - - /** - * Return a CloudWatch Event Rule matching TIMEOUT state. - * - * @param id construct id. - * @param options optional event options. default is {}. - */ - public onTimeout(id: string, options: events.OnEventOptions = {}): events.Rule { - return this.onStateChange(id, JobState.TIMEOUT, options); - } - - /** - * Create a CloudWatch metric. - * - * @param metricName name of the metric typically prefixed with `glue.driver.`, `glue..` or `glue.ALL.`. - * @param type the metric type. - * @param props metric options. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html - */ - public metric(metricName: string, type: MetricType, props?: cloudwatch.MetricOptions): cloudwatch.Metric { - return new cloudwatch.Metric({ - metricName, - namespace: 'Glue', - dimensionsMap: { - JobName: this.jobName, - JobRunId: 'ALL', - Type: type, - }, - ...props, - }).attachTo(this); - } - - /** - * Return a CloudWatch Metric indicating job success. - * - * This metric is based on the Rule returned by no-args onSuccess() call. - */ - public metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { - return metricRule(this.metricJobStateRule('SuccessMetricRule', JobState.SUCCEEDED), props); - } - - /** - * Return a CloudWatch Metric indicating job failure. - * - * This metric is based on the Rule returned by no-args onFailure() call. - */ - public metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric { - return metricRule(this.metricJobStateRule('FailureMetricRule', JobState.FAILED), props); - } - - /** - * Return a CloudWatch Metric indicating job timeout. - * - * This metric is based on the Rule returned by no-args onTimeout() call. - */ - public metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric { - return metricRule(this.metricJobStateRule('TimeoutMetricRule', JobState.TIMEOUT), props); - } - - /** - * Creates or retrieves a singleton event rule for the input job state for use with the metric JobState methods. - * - * @param id construct id. - * @param jobState the job state. - * @private - */ - private metricJobStateRule(id: string, jobState: JobState): events.Rule { - return this.node.tryFindChild(id) as events.Rule ?? this.onStateChange(id, jobState); - } -} - -/** - * Properties for enabling Continuous Logging for Glue Jobs. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ -export interface ContinuousLoggingPropsLegacy { - /** - * Enable continouous logging. - */ - readonly enabled: boolean; - - /** - * Specify a custom CloudWatch log group name. - * - * @default - a log group is created with name `/aws-glue/jobs/logs-v2/`. - */ - readonly logGroup?: logs.ILogGroup; - - /** - * Specify a custom CloudWatch log stream prefix. - * - * @default - the job run ID. - */ - readonly logStreamPrefix?: string; - - /** - * Filter out non-useful Apache Spark driver/executor and Apache Hadoop YARN heartbeat log messages. - * - * @default true - */ - readonly quiet?: boolean; - - /** - * Apply the provided conversion pattern. - * - * This is a Log4j Conversion Pattern to customize driver and executor logs. - * - * @default `%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n` - */ - readonly conversionPattern?: string; -} - -/** - * Attributes for importing `Job`. - */ -export interface JobLegacyAttributes { - /** - * The name of the job. - */ - readonly jobName: string; - - /** - * The IAM role assumed by Glue to run this job. - * - * @default - undefined - */ - readonly role?: iam.IRole; -} - -/** - * Construction properties for `Job`. - */ -export interface JobLegacyProps { - /** - * The job's executable properties. - */ - readonly executable: JobExecutable; - - /** - * The name of the job. - * - * @default - a name is automatically generated - */ - readonly jobName?: string; - - /** - * The description of the job. - * - * @default - no value - */ - readonly description?: string; - - /** - * The number of AWS Glue data processing units (DPUs) that can be allocated when this job runs. - * Cannot be used for Glue version 2.0 and later - workerType and workerCount should be used instead. - * - * @default - 10 when job type is Apache Spark ETL or streaming, 0.0625 when job type is Python shell - */ - readonly maxCapacity?: number; - - /** - * The maximum number of times to retry this job after a job run fails. - * - * @default 0 - */ - readonly maxRetries?: number; - - /** - * The maximum number of concurrent runs allowed for the job. - * - * An error is returned when this threshold is reached. The maximum value you can specify is controlled by a service limit. - * - * @default 1 - */ - readonly maxConcurrentRuns?: number; - - /** - * The number of minutes to wait after a job run starts, before sending a job run delay notification. - * - * @default - no delay notifications - */ - readonly notifyDelayAfter?: cdk.Duration; - - /** - * The maximum time that a job run can consume resources before it is terminated and enters TIMEOUT status. - * - * @default cdk.Duration.hours(48) - */ - readonly timeout?: cdk.Duration; - - /** - * The type of predefined worker that is allocated when a job runs. - * - * @default - differs based on specific Glue version - */ - readonly workerType?: WorkerType; - - /** - * The number of workers of a defined `WorkerType` that are allocated when a job runs. - * - * @default - differs based on specific Glue version/worker type - */ - readonly workerCount?: number; - - /** - * The `Connection`s used for this job. - * - * Connections are used to connect to other AWS Service or resources within a VPC. - * - * @default [] - no connections are added to the job - */ - readonly connections?: IConnection[]; - - /** - * The `SecurityConfiguration` to use for this job. - * - * @default - no security configuration. - */ - readonly securityConfiguration?: ISecurityConfiguration; - - /** - * The default arguments for this job, specified as name-value pairs. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html for a list of reserved parameters - * @default - no arguments - */ - readonly defaultArguments?: { [key: string]: string }; - - /** - * The tags to add to the resources on which the job runs - * - * @default {} - no tags - */ - readonly tags?: { [key: string]: string }; - - /** - * The IAM role assumed by Glue to run this job. - * - * If providing a custom role, it needs to trust the Glue service principal (glue.amazonaws.com) and be granted sufficient permissions. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/getting-started-access.html - * - * @default - a role is automatically generated - */ - readonly role?: iam.IRole; - - /** - * Enables the collection of metrics for job profiling. - * - * @default - no profiling metrics emitted. - * - * @see `--enable-metrics` at https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly enableProfilingMetrics? :boolean; - - /** - * Enables the Spark UI debugging and monitoring with the specified props. - * - * @default - Spark UI debugging and monitoring is disabled. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly sparkUI?: SparkUIProps; - - /** - * Enables continuous logging with the specified props. - * - * @default - continuous logging is disabled. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly continuousLogging?: ContinuousLoggingPropsLegacy; - - /** - * The ExecutionClass whether the job is run with a standard or flexible execution class. - * - * @default - STANDARD - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html#aws-glue-api-jobs-job-Job - * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html - */ - readonly executionClass?: ExecutionClass; -} - -/** - * A Glue Job. - * @resource AWS::Glue::Job - */ -export class JobLegacy extends JobBaseLegacy { - /** - * Creates a Glue Job - * - * @param scope The scope creating construct (usually `this`). - * @param id The construct's id. - * @param attrs Import attributes - */ - public static fromJobLegacyAttributes(scope: Construct, id: string, attrs: JobLegacyAttributes): IJobLegacy { - class Import extends JobBaseLegacy { - public readonly jobName = attrs.jobName; - public readonly jobArn = jobArn(scope, attrs.jobName); - public readonly grantPrincipal = attrs.role ?? new iam.UnknownPrincipal({ resource: this }); - } - - return new Import(scope, id); - } - - /** - * The ARN of the job. - */ - public readonly jobArn: string; - - /** - * The name of the job. - */ - public readonly jobName: string; - - /** - * The IAM role Glue assumes to run this job. - */ - public readonly role: iam.IRole; - - /** - * The principal this Glue Job is running as. - */ - public readonly grantPrincipal: iam.IPrincipal; - - /** - * The Spark UI logs location if Spark UI monitoring and debugging is enabled. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - public readonly sparkUILoggingLocation?: SparkUILoggingLocation; - - constructor(scope: Construct, id: string, props: JobLegacyProps) { - super(scope, id, { - physicalName: props.jobName, - }); - - const executable = props.executable.bind(); - - this.role = props.role ?? new iam.Role(this, 'ServiceRole', { - assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), - managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], - }); - this.grantPrincipal = this.role; - - const sparkUI = props.sparkUI ? this.setupSparkUI(executable, this.role, props.sparkUI) : undefined;; - this.sparkUILoggingLocation = sparkUI?.location; - const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; - const profilingMetricsArgs = props.enableProfilingMetrics ? { '--enable-metrics': '' } : {}; - - const defaultArguments = { - ...this.executableArguments(executable), - ...continuousLoggingArgs, - ...profilingMetricsArgs, - ...sparkUI?.args, - ...this.checkNoReservedArgs(props.defaultArguments), - }; - - // TODO: Implement these validations as interface contracts - - // if (props.executionClass === ExecutionClass.FLEX) { - // if (executable.type !== JobType.ETL) { - // throw new Error('FLEX ExecutionClass is only available for JobType.ETL jobs'); - // } - // if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0].includes(executable.glueVersion)) { - // throw new Error('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); - // } - // if (props.workerType && (props.workerType !== WorkerType.G_1X && props.workerType !== WorkerType.G_2X)) { - // throw new Error('FLEX ExecutionClass is only available for WorkerType G_1X or G_2X'); - // } - // } - - // let maxCapacity = props.maxCapacity; - // if (maxCapacity !== undefined && (props.workerType && props.workerCount !== undefined)) { - // throw new Error('maxCapacity cannot be used when setting workerType and workerCount'); - // } - // if (executable.type !== JobType.PYTHON_SHELL) { - // if (maxCapacity !== undefined && ![GlueVersion.V0_9, GlueVersion.V1_0].includes(executable.glueVersion)) { - // throw new Error('maxCapacity cannot be used when GlueVersion 2.0 or later'); - // } - // } else { - // // max capacity validation for python shell jobs (defaults to 0.0625) - // maxCapacity = maxCapacity ?? 0.0625; - // if (maxCapacity !== 0.0625 && maxCapacity !== 1) { - // throw new Error(`maxCapacity value must be either 0.0625 or 1 for JobType.PYTHON_SHELL jobs, received ${maxCapacity}`); - // } - // } - // if ((!props.workerType && props.workerCount !== undefined) || (props.workerType && props.workerCount === undefined)) { - // throw new Error('Both workerType and workerCount must be set'); - // } - - const jobResource = new CfnJob(this, 'Resource', { - name: props.jobName, - description: props.description, - role: this.role.roleArn, - command: { - name: executable.type, - scriptLocation: this.codeS3ObjectUrl(executable.script), - pythonVersion: executable.pythonVersion, - runtime: executable.runtime ? executable.runtime : undefined, - }, - glueVersion: executable.glueVersion, - workerType: props.workerType, - numberOfWorkers: props.workerCount, - maxCapacity: props.maxCapacity, - maxRetries: props.maxRetries, - executionClass: props.executionClass, - executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, - notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, - timeout: props.timeout?.toMinutes(), - connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, - securityConfiguration: props.securityConfiguration?.securityConfigurationName, - tags: props.tags, - defaultArguments, - }); - - const resourceName = this.getResourceNameAttribute(jobResource.ref); - this.jobArn = jobArn(this, resourceName); - this.jobName = resourceName; - } - - /** - * Check no usage of reserved arguments. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - private checkNoReservedArgs(defaultArguments?: { [key: string]: string }) { - if (defaultArguments) { - const reservedArgs = new Set(['--debug', '--mode', '--JOB_NAME']); - Object.keys(defaultArguments).forEach((arg) => { - if (reservedArgs.has(arg)) { - throw new Error(`The ${arg} argument is reserved by Glue. Don't set it`); - } - }); - } - return defaultArguments; - } - - private executableArguments(config: JobExecutableConfig) { - const args: { [key: string]: string } = {}; - args['--job-language'] = config.language; - if (config.className) { - args['--class'] = config.className; - } - if (config.extraJars && config.extraJars?.length > 0) { - args['--extra-jars'] = config.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); - } - if (config.extraPythonFiles && config.extraPythonFiles.length > 0) { - args['--extra-py-files'] = config.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); - } - if (config.extraFiles && config.extraFiles.length > 0) { - args['--extra-files'] = config.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); - } - if (config.extraJarsFirst) { - args['--user-jars-first'] = 'true'; - } - return args; - } - - private setupSparkUI(executable: JobExecutableConfig, role: iam.IRole, props: SparkUIProps) { - if (JobType.PYTHON_SHELL === executable.type) { - throw new Error('Spark UI is not available for JobType.PYTHON_SHELL jobs'); - } else if (JobType.RAY === executable.type) { - throw new Error('Spark UI is not available for JobType.RAY jobs'); - } - - this.validatePrefix(props.prefix); - const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket'); - bucket.grantReadWrite(role, this.cleanPrefixForGrant(props.prefix)); - const args = { - '--enable-spark-ui': 'true', - '--spark-event-logs-path': bucket.s3UrlForObject(props.prefix), - }; - - return { - location: { - prefix: props.prefix, - bucket, - }, - args, - }; - } - - private validatePrefix(prefix?: string): void { - if (!prefix || cdk.Token.isUnresolved(prefix)) { - // skip validation if prefix is not specified or is a token - return; - } - - const errors: string[] = []; - - if (prefix.startsWith('/')) { - errors.push('Prefix must not begin with \'/\''); - } - - if (!prefix.endsWith('/')) { - errors.push('Prefix must end with \'/\''); - } - - if (errors.length > 0) { - throw new Error(`Invalid prefix format (value: ${prefix})${EOL}${errors.join(EOL)}`); - } - } - - private cleanPrefixForGrant(prefix?: string): string | undefined { - return prefix !== undefined ? `${prefix}*` : undefined; - } - - private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingPropsLegacy) { - const args: {[key: string]: string} = { - '--enable-continuous-cloudwatch-log': 'true', - '--enable-continuous-log-filter': (props.quiet ?? true).toString(), - }; - - if (props.logGroup) { - args['--continuous-log-logGroup'] = props.logGroup.logGroupName; - props.logGroup.grantWrite(role); - } - - if (props.logStreamPrefix) { - args['--continuous-log-logStreamPrefix'] = props.logStreamPrefix; - } - if (props.conversionPattern) { - args['--continuous-log-conversionPattern'] = props.conversionPattern; - } - return args; - } - - private codeS3ObjectUrl(code: Code) { - const s3Location = code.bind(this, this.role).s3Location; - return `s3://${s3Location.bucketName}/${s3Location.objectKey}`; - } -} - -/** - * Create a CloudWatch Metric that's based on Glue Job events - * {@see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types} - * The metric has namespace = 'AWS/Events', metricName = 'TriggeredRules' and RuleName = rule.ruleName dimension. - * - * @param rule for use in setting RuleName dimension value - * @param props metric properties - */ -function metricRule(rule: events.IRule, props?: cloudwatch.MetricOptions): cloudwatch.Metric { - return new cloudwatch.Metric({ - namespace: 'AWS/Events', - metricName: 'TriggeredRules', - dimensionsMap: { RuleName: rule.ruleName }, - statistic: cloudwatch.Statistic.SUM, - ...props, - }).attachTo(rule); -} - -/** - * Returns the job arn - * @param scope - * @param jobName - */ -function jobArn(scope: Construct, jobName: string) : string { - return cdk.Stack.of(scope).formatArn({ - service: 'glue', - resource: 'job', - resourceName: jobName, - }); -} diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts index 8dbe99ea6d916..c727856361dab 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts @@ -348,13 +348,13 @@ export interface JobProperties { * Number of workers for Glue to use during job execution * @default 10 */ - readonly numberOrWorkers?: number; + readonly numberOfWorkers?: number; /** * Worker Type (optional) * Type of Worker for Glue to use during job execution * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_2X + * @default G_1X **/ readonly workerType?: WorkerType; @@ -438,7 +438,7 @@ export interface JobProperties { /** * Enables continuous logging with the specified props. * - * @default - continuous logging is disabled. + * @default - continuous logging is enabled. * * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts index ea4c1a6cca858..ff83123b0b668 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -62,6 +62,8 @@ export class PySparkEtlJob extends Job { public readonly role: iam.IRole; public readonly grantPrincipal: iam.IPrincipal; + //private logGroup: LogGroup; + /** * The Spark UI logs location if Spark UI monitoring and debugging is enabled. * @@ -78,6 +80,7 @@ export class PySparkEtlJob extends Job { * @param props */ constructor(scope: Construct, id: string, props: PySparkEtlJobProps) { + super(scope, id, { physicalName: props.jobName, }); @@ -111,8 +114,8 @@ export class PySparkEtlJob extends Job { ...this.checkNoReservedArgs(props.defaultArguments), }; - if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { - throw new Error('Both workerType and numberOrWorkers must be set'); + if ((!props.workerType && props.numberOfWorkers !== undefined) || (props.workerType && props.numberOfWorkers === undefined)) { + throw new Error('Both workerType and numberOFWorkers must be set'); } const jobResource = new CfnJob(this, 'Resource', { @@ -125,8 +128,8 @@ export class PySparkEtlJob extends Job { pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, - numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, + numberOfWorkers: props.numberOfWorkers ? props.numberOfWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts index f22ec6656956d..858f9f1a6ea53 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts @@ -131,8 +131,8 @@ export class PySparkFlexEtlJob extends Job { pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, - numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, + numberOfWorkers: props.numberOfWorkers ? props.numberOfWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index f07d9abfc0342..60e8ad279c191 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -125,8 +125,8 @@ export class PySparkStreamingJob extends Job { pythonVersion: PythonVersion.THREE, }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, - numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, + numberOfWorkers: props.numberOfWorkers ? props.numberOfWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, timeout: props.timeout?.toMinutes(), diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts index 86d658d70ba66..5171520ff7a07 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts @@ -92,7 +92,7 @@ export class RayJob extends Job { }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, workerType: props.workerType ? props.workerType : WorkerType.Z_2X, - numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers: 3, + numberOfWorkers: props.numberOfWorkers ? props.numberOfWorkers: 3, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, timeout: props.timeout?.toMinutes(), diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts index 2c508202207f7..d4b7cd8bb1a6b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -117,8 +117,8 @@ export class ScalaSparkEtlJob extends Job { ...this.checkNoReservedArgs(props.defaultArguments), }; - if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { - throw new Error('Both workerType and numberOrWorkers must be set'); + if ((!props.workerType && props.numberOfWorkers !== undefined) || (props.workerType && props.numberOfWorkers === undefined)) { + throw new Error('Both workerType and numberOfWorkers must be set'); } const jobResource = new CfnJob(this, 'Resource', { @@ -130,8 +130,8 @@ export class ScalaSparkEtlJob extends Job { scriptLocation: this.codeS3ObjectUrl(props.script), }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, - numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, + numberOfWorkers: props.numberOfWorkers ? props.numberOfWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, timeout: props.timeout?.toMinutes(), diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts index 33928ac6d205d..781ea5ba2b957 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts @@ -164,8 +164,8 @@ export class ScalaSparkFlexEtlJob extends Job { scriptLocation: this.codeS3ObjectUrl(props.script), }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V3_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, - numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, + numberOfWorkers: props.numberOfWorkers ? props.numberOfWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts index d89c15b46ef40..7085cb2e71d37 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts @@ -116,8 +116,8 @@ export class ScalaSparkStreamingJob extends Job { ...this.checkNoReservedArgs(props.defaultArguments), }; - if ((!props.workerType && props.numberOrWorkers !== undefined) || (props.workerType && props.numberOrWorkers === undefined)) { - throw new Error('Both workerType and numberOrWorkers must be set'); + if ((!props.workerType && props.numberOfWorkers !== undefined) || (props.workerType && props.numberOfWorkers === undefined)) { + throw new Error('Both workerType and numberOfWorkers must be set'); } const jobResource = new CfnJob(this, 'Resource', { @@ -129,8 +129,8 @@ export class ScalaSparkStreamingJob extends Job { scriptLocation: this.codeS3ObjectUrl(props.script), }, glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_2X, - numberOfWorkers: props.numberOrWorkers ? props.numberOrWorkers : 10, + workerType: props.workerType ? props.workerType : WorkerType.G_1X, + numberOfWorkers: props.numberOfWorkers ? props.numberOfWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, timeout: props.timeout?.toMinutes(), diff --git a/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts index 8d40a1aae9a8c..93e2f9aeb24e2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts @@ -4,6 +4,7 @@ import * as s3 from 'aws-cdk-lib/aws-s3'; import * as cdk from 'aws-cdk-lib'; import * as cxapi from 'aws-cdk-lib/cx-api'; import * as glue from '../lib'; +import { Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; describe('Code', () => { let stack: cdk.Stack; @@ -21,11 +22,11 @@ describe('Code', () => { test('with valid bucket name and key and bound by job sets the right path and grants the job permissions to read from it', () => { bucket = s3.Bucket.fromBucketName(stack, 'Bucket', 'bucketname'); script = glue.Code.fromBucket(bucket, key); - new glue.JobLegacy(stack, 'Job1', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, + + new glue.PythonShellJob(stack, 'Job1', { + script, + role: new Role(stack, 'Role', { + assumedBy: new ServicePrincipal('glue.amazonaws.com'), }), }); @@ -93,11 +94,10 @@ describe('Code', () => { }); test("with valid and existing file path and bound to job sets job's script location and permissions stack metadata", () => { - new glue.JobLegacy(stack, 'Job1', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, + new glue.PythonShellJob(stack, 'Job1', { + script, + role: new Role(stack, 'Role', { + assumedBy: new ServicePrincipal('glue.amazonaws.com'), }), }); @@ -205,18 +205,16 @@ describe('Code', () => { }); test('used in more than 1 job in the same stack should be reused', () => { - new glue.JobLegacy(stack, 'Job1', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, + new glue.PythonShellJob(stack, 'Job1', { + script, + role: new Role(stack, 'Role', { + assumedBy: new ServicePrincipal('glue.amazonaws.com'), }), }); - new glue.JobLegacy(stack, 'Job2', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, + new glue.PythonShellJob(stack, 'Job2', { + script, + role: new Role(stack, 'Role', { + assumedBy: new ServicePrincipal('glue.amazonaws.com'), }), }); const ScriptLocation = { @@ -285,20 +283,18 @@ describe('Code', () => { }); test('throws if trying to rebind in another stack', () => { - new glue.JobLegacy(stack, 'Job1', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, + new glue.PythonShellJob(stack, 'Job1', { + script, + role: new Role(stack, 'Role', { + assumedBy: new ServicePrincipal('glue.amazonaws.com'), }), }); const differentStack = new cdk.Stack(); - expect(() => new glue.JobLegacy(differentStack, 'Job2', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script: script, + expect(() => new glue.PythonShellJob(differentStack, 'Job1', { + script, + role: new Role(stack, 'Role', { + assumedBy: new ServicePrincipal('glue.amazonaws.com'), }), })).toThrow(/associated with another stack/); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.ts index 86f874b908b1b..e19bb8756e9a9 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.ts @@ -40,8 +40,8 @@ new glue.PySparkEtlJob(stack, 'OverridePySparkETLJob', { role: iam_role, description: 'Optional Override PySpark ETL Job', glueVersion: glue.GlueVersion.V3_0, - numberOrWorkers: 20, - workerType: glue.WorkerType.G_2X, + numberOfWorkers: 20, + workerType: glue.WorkerType.G_1X, timeout: cdk.Duration.minutes(15), jobName: 'Optional Override PySpark ETL Job', defaultArguments: { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.ts index 09e662c579791..d53bb703c0123 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.ts @@ -46,7 +46,7 @@ new glue.PySparkFlexEtlJob(stack, 'OverridePySparkFlexEtlJob', { role: iam_role, description: 'Optional Override PySpark Flex Etl Job', glueVersion: glue.GlueVersion.V3_0, - numberOrWorkers: 20, + numberOfWorkers: 20, workerType: glue.WorkerType.G_1X, timeout: cdk.Duration.minutes(15), jobName: 'Optional Override PySpark Flex Etl Job', diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts index 9a640578e2768..bb225a6d40bb2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.ts @@ -40,8 +40,8 @@ new glue.PySparkStreamingJob(stack, 'OverridePySparkStreamingJob', { role: iam_role, description: 'Optional Override PySpark Streaming Job', glueVersion: glue.GlueVersion.V3_0, - numberOrWorkers: 20, - workerType: glue.WorkerType.G_2X, + numberOfWorkers: 20, + workerType: glue.WorkerType.G_1X, timeout: cdk.Duration.minutes(15), jobName: 'Optional Override PySpark Streaming Job', defaultArguments: { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.ts index 1432e78da249d..d9f9aba511928 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.ts @@ -43,8 +43,8 @@ new glue.ScalaSparkEtlJob(stack, 'OverrideScalaSparkETLJob', { role: iam_role, description: 'Optional Override ScalaSpark ETL Job', glueVersion: glue.GlueVersion.V3_0, - numberOrWorkers: 20, - workerType: glue.WorkerType.G_2X, + numberOfWorkers: 20, + workerType: glue.WorkerType.G_1X, timeout: cdk.Duration.minutes(15), jobName: 'Optional Override ScalaSpark ETL Job', defaultArguments: { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.ts index 58c1bf7a36348..6ecb66ded6352 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.ts @@ -43,7 +43,7 @@ new glue.ScalaSparkFlexEtlJob(stack, 'OverrideScalaSparkFlexEtlJob', { role: iam_role, description: 'Optional Override ScalaSpark Flex Etl Job', glueVersion: glue.GlueVersion.V3_0, - numberOrWorkers: 20, + numberOfWorkers: 20, workerType: glue.WorkerType.G_1X, timeout: cdk.Duration.minutes(15), jobName: 'Optional Override ScalaSpark Flex Etl Job', diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.ts index 277426faa0832..0d5e4878586b9 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.ts @@ -43,8 +43,8 @@ new glue.ScalaSparkStreamingJob(stack, 'OverrideScalaSparkStreamingJob', { role: iam_role, description: 'Optional Override ScalaSpark Streaming Job', glueVersion: glue.GlueVersion.V3_0, - numberOrWorkers: 20, - workerType: glue.WorkerType.G_2X, + numberOfWorkers: 20, + workerType: glue.WorkerType.G_1X, timeout: cdk.Duration.minutes(15), jobName: 'Optional Override ScalaSpark Streaming Job', defaultArguments: { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts deleted file mode 100644 index af502a5b12237..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts +++ /dev/null @@ -1,141 +0,0 @@ -import * as path from 'path'; -import * as cdk from 'aws-cdk-lib'; -import * as glue from '../lib'; - -/** - * To verify the ability to run jobs created in this test - * - * Run the job using - * `aws glue start-job-run --region us-east-1 --job-name ` - * This will return a runId - * - * Get the status of the job run using - * `aws glue get-job-run --region us-east-1 --job-name --run-id ` - * - * For example, to test the ShellJob - * - Run: `aws glue start-job-run --region us-east-1 --job-name ShellJob` - * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ShellJob --run-id ` - * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" - */ -const app = new cdk.App(); - -const stack = new cdk.Stack(app, 'aws-glue-job'); - -const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); - -[glue.GlueVersion.V2_0, glue.GlueVersion.V3_0, glue.GlueVersion.V4_0].forEach((glueVersion) => { - const etlJob = new glue.JobLegacy(stack, 'EtlJob' + glueVersion, { - jobName: 'EtlJob' + glueVersion, - executable: glue.JobExecutable.pythonEtl({ - pythonVersion: glue.PythonVersion.THREE, - glueVersion, - script, - }), - workerType: glue.WorkerType.G_1X, - workerCount: 10, - maxConcurrentRuns: 2, - maxRetries: 2, - timeout: cdk.Duration.minutes(5), - notifyDelayAfter: cdk.Duration.minutes(1), - defaultArguments: { - 'arg1': 'value1', - 'arg2': 'value2', - '--conf': 'valueConf', - }, - // sparkUI: { - // enabled: true, - // }, - continuousLogging: { - enabled: true, - quiet: true, - logStreamPrefix: 'EtlJob', - }, - executionClass: glue.ExecutionClass.STANDARD, - tags: { - key: 'value', - }, - }); - etlJob.metricSuccess(); - new glue.JobLegacy(stack, 'StreamingJob' + glueVersion, { - jobName: 'StreamingJob' + glueVersion, - executable: glue.JobExecutable.pythonStreaming({ - pythonVersion: glue.PythonVersion.THREE, - glueVersion, - script, - }), - workerType: [glue.GlueVersion.V2_0].includes(glueVersion) ? glue.WorkerType.G_1X : glue.WorkerType.G_025X, - workerCount: 10, - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - tags: { - key: 'value', - }, - }); -}); - -new glue.JobLegacy(stack, 'ShellJob', { - jobName: 'ShellJob', - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - tags: { - key: 'value', - }, -}); - -new glue.JobLegacy(stack, 'ShellJob39', { - jobName: 'ShellJob39', - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - }), - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - tags: { - key: 'value', - }, -}); - -new glue.JobLegacy(stack, 'RayJob', { - jobName: 'RayJob', - executable: glue.JobExecutable.pythonRay({ - glueVersion: glue.GlueVersion.V4_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - script, - }), - workerType: glue.WorkerType.Z_2X, - workerCount: 2, - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - tags: { - key: 'value', - }, -}); - -new glue.JobLegacy(stack, 'EtlJobWithFLEX', { - jobName: 'EtlJobWithFLEX', - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - workerType: glue.WorkerType.G_1X, - workerCount: 10, - executionClass: glue.ExecutionClass.FLEX, -}); - -app.synth(); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.ts index ecd36233ab62e..35aa41f74c125 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.ts @@ -38,7 +38,7 @@ new glue.RayJob(stack, 'BasicRayJob', { new glue.RayJob(stack, 'RayJob5Workers', { script: script, role: iam_role, - numberOrWorkers: 5, + numberOfWorkers: 5, jobName: 'RayJobWith5Workers', defaultArguments: { arg1: 'value1', diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.ts index 5fb91a398d4ad..1f7e6cccd6302 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.ts @@ -22,7 +22,7 @@ const OutboundJob = new glue.PySparkEtlJob(stack, 'OutboundJob', { role, glueVersion: glue.GlueVersion.V4_0, workerType: glue.WorkerType.G_2X, - numberOrWorkers: 2, + numberOfWorkers: 2, }); const InboundJob = new glue.PySparkEtlJob(stack, 'InboundJob', { @@ -30,7 +30,7 @@ const InboundJob = new glue.PySparkEtlJob(stack, 'InboundJob', { role, glueVersion: glue.GlueVersion.V4_0, workerType: glue.WorkerType.G_2X, - numberOrWorkers: 2, + numberOfWorkers: 2, }); workflow.addOnDemandTrigger('OnDemandTrigger', { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts deleted file mode 100644 index 43028fc5109d7..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts +++ /dev/null @@ -1,215 +0,0 @@ -import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as cdk from 'aws-cdk-lib'; -import * as glue from '../lib'; - -describe('JobExecutable', () => { - let stack: cdk.Stack; - let bucket: s3.IBucket; - let script: glue.Code; - - beforeEach(() => { - stack = new cdk.Stack(); - bucket = s3.Bucket.fromBucketName(stack, 'Bucket', 'bucketname'); - script = glue.Code.fromBucket(bucket, 'script.py'); - }); - - describe('.of()', () => { - test('with valid config should succeed', () => { - expect(glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V1_0, - type: glue.JobType.PYTHON_SHELL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE, - script, - })).toBeDefined(); - }); - - test('with JobType.PYTHON_SHELL and a language other than JobLanguage.PYTHON should throw', () => { - expect(() => glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V3_0, - type: glue.JobType.PYTHON_SHELL, - language: glue.JobLanguage.SCALA, - script, - })).toThrow(/Python shell requires the language to be set to Python/); - }); - - test('with JobType.of("glueray") and a language other than JobLanguage.PYTHON should throw', () => { - expect(() => glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V4_0, - type: glue.JobType.RAY, - language: glue.JobLanguage.SCALA, - script, - })).toThrow(/Ray requires the language to be set to Python/); - }); - - test('with JobType.RAY and a language other than JobLanguage.PYTHON should throw', () => { - expect(() => glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V4_0, - type: glue.JobType.RAY, - language: glue.JobLanguage.SCALA, - script, - })).toThrow(/Ray requires the language to be set to Python/); - }); - - test('with a non JobLanguage.PYTHON and extraPythonFiles set should throw', () => { - expect(() => glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V3_0, - type: glue.JobType.ETL, - language: glue.JobLanguage.SCALA, - className: 'com.Test', - extraPythonFiles: [script], - script, - })).toThrow(/extraPythonFiles is not supported for languages other than JobLanguage.PYTHON/); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with JobType.PYTHON_SHELL and GlueVersion ${glueVersion} should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.PYTHON_SHELL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion} does not support Python Shell`); - }); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with JobType.PYTHON_SHELL and GlueVersion.of("${glueVersion}") should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.PYTHON_SHELL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion: glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion} does not support Python Shell`); - }); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0, glue.GlueVersion.V2_0, glue.GlueVersion.V3_0].forEach((glueVersion) => { - test(`with JobType.RAY and GlueVersion ${glueVersion} should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.RAY, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion} does not support Ray`); - }); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0, glue.GlueVersion.V2_0, glue.GlueVersion.V3_0].forEach((glueVersion) => { - test(`with JobType.of("glueray") and GlueVersion ${glueVersion} should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.RAY, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion} does not support Ray`); - }); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0].forEach((glueVersion) => { - test(`with extraJarsFirst set and GlueVersion ${glueVersion} should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.ETL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - extraJarsFirst: true, - script, - glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion} does not support extraJarsFirst`); - }); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0].forEach((glueVersion) => { - test(`with extraJarsFirst set and GlueVersion.of("${glueVersion}") should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.ETL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - extraJarsFirst: true, - script, - glueVersion: glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion} does not support extraJarsFirst`); - }); - }); - - [glue.GlueVersion.V2_0, glue.GlueVersion.V3_0, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with PythonVersion.TWO and GlueVersion ${glueVersion} should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.ETL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion} does not support PythonVersion 2`); - }); - }); - - [glue.GlueVersion.V2_0, glue.GlueVersion.V3_0, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with PythonVersion.TWO and GlueVersion.of("${glueVersion}") should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.ETL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion: glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion} does not support PythonVersion 2`); - }); - }); - - test('with PythonVersion set to PythonVersion.THREE_NINE and JobType etl should throw', () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.ETL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - glueVersion: glue.GlueVersion.V1_0, - })).toThrow('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell'); - }); - - test('with PythonVersion PythonVersion.THREE_NINE and JobType pythonshell should succeed', () => { - expect(glue.JobExecutable.of({ - type: glue.JobType.PYTHON_SHELL, - glueVersion: glue.GlueVersion.V1_0, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - })).toBeDefined(); - }); - - test('with PythonVersion PythonVersion.THREE_NINE and JobType.of("pythonshell") should succeed', () => { - expect(glue.JobExecutable.of({ - type: glue.JobType.PYTHON_SHELL, - glueVersion: glue.GlueVersion.V1_0, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - })).toBeDefined(); - }); - - test('with PythonVersion PythonVersion.THREE_NINE and JobType ray should succeed', () => { - expect(glue.JobExecutable.of({ - type: glue.JobType.RAY, - glueVersion: glue.GlueVersion.V4_0, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - script, - })).toBeDefined(); - }); - - test('with PythonVersion PythonVersion.THREE_NINE and JobTypeof("glueray") should succeed', () => { - expect(glue.JobExecutable.of({ - type: glue.JobType.RAY, - glueVersion: glue.GlueVersion.V4_0, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - script, - })).toBeDefined(); - }); - }); -}); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts deleted file mode 100644 index b783edfd2f181..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts +++ /dev/null @@ -1,1029 +0,0 @@ -import { EOL } from 'os'; -import { Template } from 'aws-cdk-lib/assertions'; -import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; -import * as events from 'aws-cdk-lib/aws-events'; -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as logs from 'aws-cdk-lib/aws-logs'; -import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as cdk from 'aws-cdk-lib'; -import * as glue from '../lib'; - -describe('Job', () => { - const jobName = 'test-job'; - let stack: cdk.Stack; - - beforeEach(() => { - stack = new cdk.Stack(); - }); - - describe('.fromJobAttributes()', () => { - test('with required attrs only', () => { - const job = glue.Job.fromJobAttributes(stack, 'ImportedJob', { jobName }); - - expect(job.jobName).toEqual(jobName); - expect(job.jobArn).toEqual(stack.formatArn({ - service: 'glue', - resource: 'job', - resourceName: jobName, - })); - expect(job.grantPrincipal).toEqual(new iam.UnknownPrincipal({ resource: job })); - }); - - test('with all attrs', () => { - const role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); - const job = glue.Job.fromJobAttributes(stack, 'ImportedJob', { jobName, role }); - - expect(job.jobName).toEqual(jobName); - expect(job.jobArn).toEqual(stack.formatArn({ - service: 'glue', - resource: 'job', - resourceName: jobName, - })); - expect(job.grantPrincipal).toEqual(role); - }); - }); - - describe('new', () => { - const className = 'com.amazon.test.ClassName'; - const codeBucketName = 'bucketname'; - const codeBucketAccessStatement = { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - `:s3:::${codeBucketName}`, - ], - ], - }, - { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - `:s3:::${codeBucketName}/script`, - ], - ], - }, - ], - }; - let codeBucket: s3.IBucket; - let script: glue.Code; - let extraJars: glue.Code[]; - let extraFiles: glue.Code[]; - let extraPythonFiles: glue.Code[]; - let job: glue.JobLegacy; - let defaultProps: glue.JobLegacyProps; - - beforeEach(() => { - codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', codeBucketName); - script = glue.Code.fromBucket(codeBucket, 'script'); - extraJars = [glue.Code.fromBucket(codeBucket, 'file1.jar'), glue.Code.fromBucket(codeBucket, 'file2.jar')]; - extraPythonFiles = [glue.Code.fromBucket(codeBucket, 'file1.py'), glue.Code.fromBucket(codeBucket, 'file2.py')]; - extraFiles = [glue.Code.fromBucket(codeBucket, 'file1.txt'), glue.Code.fromBucket(codeBucket, 'file2.txt')]; - defaultProps = { - executable: glue.JobExecutable.scalaEtl({ - glueVersion: glue.GlueVersion.V2_0, - className, - script, - }), - }; - }); - - describe('with necessary props only', () => { - beforeEach(() => { - job = new glue.JobLegacy(stack, 'Job', defaultProps); - }); - - test('should create a role and use it with the job', () => { - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Role', { - AssumeRolePolicyDocument: { - Statement: [ - { - Action: 'sts:AssumeRole', - Effect: 'Allow', - Principal: { - Service: 'glue.amazonaws.com', - }, - }, - ], - Version: '2012-10-17', - }, - ManagedPolicyArns: [ - { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':iam::aws:policy/service-role/AWSGlueServiceRole', - ], - ], - }, - ], - }); - - // Role policy should grant reading from the assets bucket - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - codeBucketAccessStatement, - ], - }, - Roles: [ - { - Ref: 'JobServiceRole4F432993', - }, - ], - }); - - // check the job using the role - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - Command: { - Name: 'glueetl', - ScriptLocation: 's3://bucketname/script', - }, - Role: { - 'Fn::GetAtt': [ - 'JobServiceRole4F432993', - 'Arn', - ], - }, - }); - }); - - test('should return correct jobName and jobArn from CloudFormation', () => { - expect(stack.resolve(job.jobName)).toEqual({ Ref: 'JobB9D00F9F' }); - expect(stack.resolve(job.jobArn)).toEqual({ - 'Fn::Join': ['', ['arn:', { Ref: 'AWS::Partition' }, ':glue:', { Ref: 'AWS::Region' }, ':', { Ref: 'AWS::AccountId' }, ':job/', { Ref: 'JobB9D00F9F' }]], - }); - }); - - test('with a custom role should use it and set it in CloudFormation', () => { - const role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); - job = new glue.JobLegacy(stack, 'JobWithRole', { - ...defaultProps, - role, - }); - - expect(job.grantPrincipal).toEqual(role); - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - Role: role.roleArn, - }); - }); - - test('with a custom jobName should set it in CloudFormation', () => { - job = new glue.JobLegacy(stack, 'JobWithName', { - ...defaultProps, - jobName, - }); - - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - Name: jobName, - }); - }); - }); - - describe('enabling continuous logging with defaults', () => { - beforeEach(() => { - job = new glue.JobLegacy(stack, 'Job', { - ...defaultProps, - continuousLogging: { enabled: true }, - }); - }); - - test('should set minimal default arguments', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - DefaultArguments: { - '--enable-continuous-cloudwatch-log': 'true', - '--enable-continuous-log-filter': 'true', - }, - }); - }); - }); - - describe('enabling continuous logging with all props set', () => { - let logGroup; - - beforeEach(() => { - logGroup = logs.LogGroup.fromLogGroupName(stack, 'LogGroup', 'LogGroupName'); - job = new glue.JobLegacy(stack, 'Job', { - ...defaultProps, - continuousLogging: { - enabled: true, - quiet: false, - logStreamPrefix: 'LogStreamPrefix', - conversionPattern: '%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n', - logGroup, - }, - }); - }); - - test('should set all arguments', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - DefaultArguments: { - '--enable-continuous-cloudwatch-log': 'true', - '--enable-continuous-log-filter': 'false', - '--continuous-log-logGroup': 'LogGroupName', - '--continuous-log-logStreamPrefix': 'LogStreamPrefix', - '--continuous-log-conversionPattern': '%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n', - }, - }); - }); - - test('should grant cloudwatch log write permissions', () => { - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 'logs:CreateLogStream', - 'logs:PutLogEvents', - ], - Effect: 'Allow', - Resource: { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':logs:', - { - Ref: 'AWS::Region', - }, - ':', - { - Ref: 'AWS::AccountId', - }, - ':log-group:LogGroupName:*', - ], - ], - }, - }, - codeBucketAccessStatement, - ], - }, - Roles: [ - { - Ref: 'JobServiceRole4F432993', - }, - ], - }); - }); - }); - - describe('enabling execution class', () => { - describe('enabling execution class with FLEX', () => { - beforeEach(() => { - job = new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - }); - }); - - test('should set FLEX', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - ExecutionClass: 'FLEX', - }); - }); - }); - - describe('enabling execution class with FLEX and WorkerType G_1X', () => { - beforeEach(() => { - job = new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - workerType: glue.WorkerType.G_1X, - workerCount: 10, - }); - }); - - test('should set FLEX', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - ExecutionClass: 'FLEX', - WorkerType: 'G.1X', - }); - }); - }); - - describe('enabling execution class with FLEX and WorkerType G_2X', () => { - beforeEach(() => { - job = new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - workerType: glue.WorkerType.G_2X, - workerCount: 10, - }); - }); - - test('should set FLEX', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - ExecutionClass: 'FLEX', - WorkerType: 'G.2X', - }); - }); - }); - - describe('enabling execution class with STANDARD', () => { - beforeEach(() => { - job = new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.STANDARD, - }); - }); - - test('should set STANDARD', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - ExecutionClass: 'STANDARD', - }); - }); - }); - - }); - - describe('enabling spark ui', () => { - describe('with no bucket or path provided', () => { - beforeEach(() => { - job = new glue.JobLegacy(stack, 'Job', { - ...defaultProps, - sparkUI: { }, - }); - }); - - test('should create spark ui bucket', () => { - Template.fromStack(stack).resourceCountIs('AWS::S3::Bucket', 1); - }); - - test('should grant the role read/write permissions to the spark ui bucket', () => { - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - 's3:DeleteObject*', - 's3:PutObject', - 's3:PutObjectLegalHold', - 's3:PutObjectRetention', - 's3:PutObjectTagging', - 's3:PutObjectVersionTagging', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::GetAtt': [ - 'JobSparkUIBucket8E6A0139', - 'Arn', - ], - }, - { - 'Fn::Join': [ - '', - [ - { - 'Fn::GetAtt': [ - 'JobSparkUIBucket8E6A0139', - 'Arn', - ], - }, - '/*', - ], - ], - }, - ], - }, - codeBucketAccessStatement, - ], - Version: '2012-10-17', - }, - PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D', - Roles: [ - { - Ref: 'JobServiceRole4F432993', - }, - ], - }); - }); - - test('should set spark arguments on the job', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - DefaultArguments: { - '--enable-spark-ui': 'true', - '--spark-event-logs-path': { - 'Fn::Join': [ - '', - [ - 's3://', - { - Ref: 'JobSparkUIBucket8E6A0139', - }, - ], - ], - }, - }, - }); - }); - }); - - describe('with bucket provided', () => { - const sparkUIBucketName = 'sparkbucketname'; - let sparkUIBucket: s3.IBucket; - - beforeEach(() => { - sparkUIBucket = s3.Bucket.fromBucketName(stack, 'SparkBucketId', sparkUIBucketName); - job = new glue.JobLegacy(stack, 'Job', { - ...defaultProps, - sparkUI: { - bucket: sparkUIBucket, - }, - }); - }); - - test('should grant the role read/write permissions to the provided spark ui bucket', () => { - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - 's3:DeleteObject*', - 's3:PutObject', - 's3:PutObjectLegalHold', - 's3:PutObjectRetention', - 's3:PutObjectTagging', - 's3:PutObjectVersionTagging', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':s3:::sparkbucketname', - ], - ], - }, - { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':s3:::sparkbucketname/*', - ], - ], - }, - ], - }, - codeBucketAccessStatement, - ], - }, - Roles: [ - { - Ref: 'JobServiceRole4F432993', - }, - ], - }); - }); - - test('should set spark arguments on the job', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - DefaultArguments: { - '--enable-spark-ui': 'true', - '--spark-event-logs-path': `s3://${sparkUIBucketName}`, - }, - }); - }); - }); - describe('with bucket and path provided', () => { - const sparkUIBucketName = 'sparkbucketname'; - const prefix = 'foob/bart/'; - const badPrefix = '/foob/bart'; - let sparkUIBucket: s3.IBucket; - - const expectedErrors = [ - `Invalid prefix format (value: ${badPrefix})`, - 'Prefix must not begin with \'/\'', - 'Prefix must end with \'/\'', - ].join(EOL); - it('fails if path is mis-formatted', () => { - expect(() => new glue.JobLegacy(stack, 'BadPrefixJob', { - ...defaultProps, - sparkUI: { - bucket: sparkUIBucket, - prefix: badPrefix, - }, - })).toThrow(expectedErrors); - }); - - beforeEach(() => { - sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName); - job = new glue.JobLegacy(stack, 'Job', { - ...defaultProps, - sparkUI: { - bucket: sparkUIBucket, - prefix: prefix, - }, - }); - }); - - it('should grant the role read/write permissions spark ui bucket prefixed folder', () => { - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - 's3:DeleteObject*', - 's3:PutObject', - 's3:PutObjectLegalHold', - 's3:PutObjectRetention', - 's3:PutObjectTagging', - 's3:PutObjectVersionTagging', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::Join': [ - '', - [ - 'arn:', - { Ref: 'AWS::Partition' }, - ':s3:::sparkbucketname', - ], - ], - }, - { - 'Fn::Join': [ - '', - [ - 'arn:', - { Ref: 'AWS::Partition' }, - `:s3:::sparkbucketname/${prefix}*`, - ], - ], - }, - ], - }, - codeBucketAccessStatement, - ], - Version: '2012-10-17', - }, - PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D', - Roles: [{ Ref: 'JobServiceRole4F432993' }], - }); - }); - - it('should set spark arguments on the job', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - DefaultArguments: { - '--enable-spark-ui': 'true', - '--spark-event-logs-path': `s3://${sparkUIBucketName}/${prefix}`, - }, - }); - }); - }); - }); - - describe('with extended props', () => { - beforeEach(() => { - job = new glue.JobLegacy(stack, 'Job', { - ...defaultProps, - jobName, - description: 'test job', - workerType: glue.WorkerType.G_2X, - workerCount: 10, - maxConcurrentRuns: 2, - maxRetries: 2, - timeout: cdk.Duration.minutes(5), - notifyDelayAfter: cdk.Duration.minutes(1), - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - connections: [glue.Connection.fromConnectionName(stack, 'ImportedConnection', 'ConnectionName')], - securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'ImportedSecurityConfiguration', 'SecurityConfigurationName'), - enableProfilingMetrics: true, - tags: { - key: 'value', - }, - }); - }); - - test('should synthesize correctly', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - Command: { - Name: 'glueetl', - ScriptLocation: 's3://bucketname/script', - }, - Role: { - 'Fn::GetAtt': [ - 'JobServiceRole4F432993', - 'Arn', - ], - }, - DefaultArguments: { - '--job-language': 'scala', - '--class': 'com.amazon.test.ClassName', - '--enable-metrics': '', - 'arg1': 'value1', - 'arg2': 'value2', - }, - Description: 'test job', - ExecutionProperty: { - MaxConcurrentRuns: 2, - }, - GlueVersion: '2.0', - MaxRetries: 2, - Name: 'test-job', - NotificationProperty: { - NotifyDelayAfter: 1, - }, - NumberOfWorkers: 10, - Tags: { - key: 'value', - }, - Timeout: 5, - WorkerType: 'G.2X', - Connections: { - Connections: [ - 'ConnectionName', - ], - }, - SecurityConfiguration: 'SecurityConfigurationName', - }); - }); - }); - - test('with reserved args should throw', () => { - ['--debug', '--mode', '--JOB_NAME'].forEach((arg, index) => { - const defaultArguments: {[key: string]: string} = {}; - defaultArguments[arg] = 'random value'; - - expect(() => new glue.JobLegacy(stack, `Job${index}`, { - executable: glue.JobExecutable.scalaEtl({ - glueVersion: glue.GlueVersion.V2_0, - className, - script, - }), - defaultArguments, - })).toThrow(/argument is reserved by Glue/); - }); - }); - - describe('shell job', () => { - test('with unsupported glue version should throw', () => { - expect(() => new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V0_9, - pythonVersion: glue.PythonVersion.TWO, - script, - }), - })).toThrow('Specified GlueVersion 0.9 does not support Python Shell'); - }); - - test('with unsupported Spark UI prop should throw', () => { - expect(() => new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - sparkUI: { }, - })).toThrow('Spark UI is not available for JobType.PYTHON_SHELL'); - }); - }); - - describe('ray job', () => { - test('with unsupported glue version should throw', () => { - expect(() => new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.pythonRay({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - script, - }), - workerType: glue.WorkerType.Z_2X, - workerCount: 2, - })).toThrow('Specified GlueVersion 3.0 does not support Ray'); - }); - - test('with unsupported Spark UI prop should throw', () => { - expect(() => new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.pythonRay({ - glueVersion: glue.GlueVersion.V4_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - script, - }), - workerType: glue.WorkerType.Z_2X, - workerCount: 2, - sparkUI: { }, - })).toThrow('Spark UI is not available for JobType.RAY'); - }); - - test('without runtime should throw', () => { - expect(() => new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.pythonRay({ - glueVersion: glue.GlueVersion.V4_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - }), - workerType: glue.WorkerType.Z_2X, - workerCount: 2, - })).toThrow('Runtime is required for Ray jobs.'); - }); - }); - - test('etl job with all props should synthesize correctly', () => { - new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - extraJarsFirst: true, - script, - extraPythonFiles, - extraJars, - extraFiles, - }), - }); - - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - GlueVersion: '2.0', - Command: { - Name: 'glueetl', - ScriptLocation: 's3://bucketname/script', - PythonVersion: '3', - }, - Role: { - 'Fn::GetAtt': [ - 'JobServiceRole4F432993', - 'Arn', - ], - }, - DefaultArguments: { - '--job-language': 'python', - '--extra-jars': 's3://bucketname/file1.jar,s3://bucketname/file2.jar', - '--extra-py-files': 's3://bucketname/file1.py,s3://bucketname/file2.py', - '--extra-files': 's3://bucketname/file1.txt,s3://bucketname/file2.txt', - '--user-jars-first': 'true', - }, - }); - }); - - test('streaming job with all props should synthesize correctly', () => { - new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.scalaStreaming({ - glueVersion: glue.GlueVersion.V2_0, - extraJarsFirst: true, - className, - script, - extraJars, - extraFiles, - }), - }); - - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - GlueVersion: '2.0', - Command: { - Name: 'gluestreaming', - ScriptLocation: 's3://bucketname/script', - }, - Role: { - 'Fn::GetAtt': [ - 'JobServiceRole4F432993', - 'Arn', - ], - }, - DefaultArguments: { - '--job-language': 'scala', - '--class': 'com.amazon.test.ClassName', - '--extra-jars': 's3://bucketname/file1.jar,s3://bucketname/file2.jar', - '--extra-files': 's3://bucketname/file1.txt,s3://bucketname/file2.txt', - '--user-jars-first': 'true', - }, - }); - }); - - describe('event rules and rule-based metrics', () => { - beforeEach(() => { - job = new glue.JobLegacy(stack, 'Job', { - executable: glue.JobExecutable.scalaEtl({ - glueVersion: glue.GlueVersion.V2_0, - className, - script, - }), - }); - }); - - test('.onEvent() should create the expected event rule', () => { - job.onEvent('eventId', {}); - - Template.fromStack(stack).hasResourceProperties('AWS::Events::Rule', { - EventPattern: { - 'source': [ - 'aws.glue', - ], - 'detail-type': [ - 'Glue Job State Change', - 'Glue Job Run Status', - ], - 'detail': { - jobName: [ - { - Ref: 'JobB9D00F9F', - }, - ], - }, - }, - State: 'ENABLED', - }); - }); - - [ - { name: 'onSuccess()', invoke: (testJob: glue.JobLegacy) => testJob.onSuccess('SuccessRule'), state: 'SUCCEEDED' }, - { name: 'onFailure()', invoke: (testJob: glue.JobLegacy) => testJob.onFailure('FailureRule'), state: 'FAILED' }, - { name: 'onTimeout()', invoke: (testJob: glue.JobLegacy) => testJob.onTimeout('TimeoutRule'), state: 'TIMEOUT' }, - ].forEach((testCase) => { - test(`${testCase.name} should create a rule with correct properties`, () => { - testCase.invoke(job); - - Template.fromStack(stack).hasResourceProperties('AWS::Events::Rule', { - Description: { - 'Fn::Join': [ - '', - [ - 'Rule triggered when Glue job ', - { - Ref: 'JobB9D00F9F', - }, - ` is in ${testCase.state} state`, - ], - ], - }, - EventPattern: { - 'source': [ - 'aws.glue', - ], - 'detail-type': [ - 'Glue Job State Change', - 'Glue Job Run Status', - ], - 'detail': { - state: [ - testCase.state, - ], - jobName: [ - { - Ref: 'JobB9D00F9F', - }, - ], - }, - }, - State: 'ENABLED', - }); - }); - }); - - [ - { name: '.metricSuccess()', invoke: (testJob: glue.JobLegacy) => testJob.metricSuccess(), state: 'SUCCEEDED', ruleId: 'SuccessMetricRule' }, - { name: '.metricFailure()', invoke: (testJob: glue.JobLegacy) => testJob.metricFailure(), state: 'FAILED', ruleId: 'FailureMetricRule' }, - { name: '.metricTimeout()', invoke: (testJob: glue.JobLegacy) => testJob.metricTimeout(), state: 'TIMEOUT', ruleId: 'TimeoutMetricRule' }, - ].forEach((testCase) => { - test(`${testCase.name} should create the expected singleton event rule and corresponding metric`, () => { - const metric = testCase.invoke(job); - testCase.invoke(job); - - expect(metric).toEqual(new cloudwatch.Metric({ - dimensionsMap: { - RuleName: (job.node.findChild(testCase.ruleId) as events.Rule).ruleName, - }, - metricName: 'TriggeredRules', - namespace: 'AWS/Events', - statistic: 'Sum', - })); - - Template.fromStack(stack).resourceCountIs('AWS::Events::Rule', 1); - Template.fromStack(stack).hasResourceProperties('AWS::Events::Rule', { - Description: { - 'Fn::Join': [ - '', - [ - 'Rule triggered when Glue job ', - { - Ref: 'JobB9D00F9F', - }, - ` is in ${testCase.state} state`, - ], - ], - }, - EventPattern: { - 'source': [ - 'aws.glue', - ], - 'detail-type': [ - 'Glue Job State Change', - 'Glue Job Run Status', - ], - 'detail': { - state: [ - testCase.state, - ], - jobName: [ - { - Ref: 'JobB9D00F9F', - }, - ], - }, - }, - State: 'ENABLED', - }); - }); - }); - }); - - describe('.metric()', () => { - - test('with MetricType.COUNT should create a count sum metric', () => { - const metricName = 'glue.driver.aggregate.bytesRead'; - const props = { statistic: cloudwatch.Statistic.SUM }; - - expect(job.metric(metricName, glue.MetricType.COUNT, props)).toEqual(new cloudwatch.Metric({ - metricName, - statistic: 'Sum', - namespace: 'Glue', - dimensionsMap: { - JobName: job.jobName, - JobRunId: 'ALL', - Type: 'count', - }, - })); - }); - - test('with MetricType.GAUGE should create a gauge average metric', () => { - const metricName = 'glue.driver.BlockManager.disk.diskSpaceUsed_MB'; - const props = { statistic: cloudwatch.Statistic.AVERAGE }; - - expect(job.metric(metricName, glue.MetricType.GAUGE, props)).toEqual(new cloudwatch.Metric({ - metricName, - statistic: 'Average', - namespace: 'Glue', - dimensionsMap: { - JobName: job.jobName, - JobRunId: 'ALL', - Type: 'gauge', - }, - })); - }); - }); - - }); -}); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts index f1628c83f70b6..2f3443be45adb 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts @@ -45,9 +45,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts index 77390ab39dc2a..099a81bf4f2c5 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts @@ -39,9 +39,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts index a6ca7f2e9f472..3320ffd288298 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts @@ -45,9 +45,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts index 1e5957dce85e4..d718dfee4302f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts @@ -47,9 +47,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts index f9e85e9b72e4b..e34afefea0b10 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts @@ -41,9 +41,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts index 853e28dfa4224..b6c44f3f0a154 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts @@ -47,9 +47,9 @@ describe('Job', () => { }); }); - test('Default WorkerType should be G.2X', () => { + test('Default WorkerType should be G.1X', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - WorkerType: 'G.2X', + WorkerType: 'G.1X', }); }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/workflow-triggers.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/workflow-triggers.test.ts index 74d80b7b455c4..28102fc925b08 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/workflow-triggers.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/workflow-triggers.test.ts @@ -24,8 +24,8 @@ describe('Workflow and Triggers', () => { script: glue.Code.fromAsset('test/job-script/hello_world.py'), role, glueVersion: glue.GlueVersion.V4_0, - workerType: glue.WorkerType.G_2X, - numberOrWorkers: 10, + workerType: glue.WorkerType.G_1X, + numberOfWorkers: 10, }); }); From d1f3dfcce81e91801753db49e4ba5e6d637bf8c6 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Thu, 27 Jun 2024 02:28:53 +0000 Subject: [PATCH 34/51] Run tests and snapshot output --- .../aws-glue-job-pyspark-etl.assets.json | 4 +- .../aws-glue-job-pyspark-etl.template.json | 4 +- .../manifest.json | 2 +- .../tree.json | 4 +- ...aws-glue-job-pyspark-streaming.assets.json | 4 +- ...s-glue-job-pyspark-streaming.template.json | 4 +- .../manifest.json | 2 +- .../tree.json | 4 +- ...9be7858a12b228a2ae6e5c10faccd9097b1e855.py | 1 - .../aws-glue-job-pysparkflex-etl.assets.json | 32 -- ...aws-glue-job-pysparkflex-etl.template.json | 206 ---------- ...efaultTestDeployAssert3F3EC951.assets.json | 19 - ...aultTestDeployAssert3F3EC951.template.json | 36 -- .../cdk.out | 1 - .../integ.json | 12 - .../manifest.json | 131 ------ .../tree.json | 375 ------------------ .../aws-glue-job-scalaspark-etl.assets.json | 4 +- .../aws-glue-job-scalaspark-etl.template.json | 4 +- .../manifest.json | 2 +- .../tree.json | 4 +- ...ws-glue-job-scalasparkflex-etl.assets.json | 4 +- ...-glue-job-scalasparkflex-etl.template.json | 2 +- .../manifest.json | 2 +- .../tree.json | 2 +- ...-glue-job-scalaspark-streaming.assets.json | 4 +- ...lue-job-scalaspark-streaming.template.json | 4 +- .../manifest.json | 2 +- .../tree.json | 4 +- 29 files changed, 33 insertions(+), 846 deletions(-) delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json index 5b7eb192d328a..6b7e51197ff6d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "4463547df769ed17f0742accca0ff3fafe85200595d5fc0e41bb662e61244f9b": { + "b69265e3929a8fd6ea69db3797d7382df3816ef168f310b15d41ba6fe9f00e81": { "source": { "path": "aws-glue-job-pyspark-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "4463547df769ed17f0742accca0ff3fafe85200595d5fc0e41bb662e61244f9b.json", + "objectKey": "b69265e3929a8fd6ea69db3797d7382df3816ef168f310b15d41ba6fe9f00e81.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json index b4f9e98f37c82..9da907d9a9057 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json @@ -120,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverridePySparkETLJob85E17065": { @@ -163,7 +163,7 @@ "key": "value" }, "Timeout": 15, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } } }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json index daac6d41a220f..35410085e0eec 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/4463547df769ed17f0742accca0ff3fafe85200595d5fc0e41bb662e61244f9b.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/b69265e3929a8fd6ea69db3797d7382df3816ef168f310b15d41ba6fe9f00e81.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json index 4d8d80ee19641..b68d28f106e7f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json @@ -205,7 +205,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { @@ -266,7 +266,7 @@ "key": "value" }, "timeout": 15, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json index fdf1a5470ac7d..026c7f43a5905 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json @@ -14,7 +14,7 @@ } } }, - "71819bd0b99d0fc653e7361f6fe1a8a6c5fcadd0b52a1aeb4e49f7fa5d78115e": { + "366e48db9f32a5807817c16accd9f4363d2ee0a5fc43c2a11c70bb0bbec71fe1": { "source": { "path": "aws-glue-job-pyspark-streaming.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "71819bd0b99d0fc653e7361f6fe1a8a6c5fcadd0b52a1aeb4e49f7fa5d78115e.json", + "objectKey": "366e48db9f32a5807817c16accd9f4363d2ee0a5fc43c2a11c70bb0bbec71fe1.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json index 3253ef37bca76..acf446817f955 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json @@ -120,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverridePySparkStreamingJob58DE176A": { @@ -163,7 +163,7 @@ "key": "value" }, "Timeout": 15, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } } }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json index 141b55d62076c..1c424568eeffe 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/71819bd0b99d0fc653e7361f6fe1a8a6c5fcadd0b52a1aeb4e49f7fa5d78115e.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/366e48db9f32a5807817c16accd9f4363d2ee0a5fc43c2a11c70bb0bbec71fe1.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json index f9acb28811cc2..e5408f8b26d3c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json @@ -205,7 +205,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { @@ -266,7 +266,7 @@ "key": "value" }, "timeout": 15, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py deleted file mode 100644 index e75154b7c390f..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py +++ /dev/null @@ -1 +0,0 @@ -print("hello world") \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json deleted file mode 100644 index 69ac81cf61856..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "version": "36.0.0", - "files": { - "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": { - "source": { - "path": "asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", - "packaging": "file" - }, - "destinations": { - "current_account-current_region": { - "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", - "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" - } - } - }, - "526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff": { - "source": { - "path": "aws-glue-job-pysparkflex-etl.template.json", - "packaging": "file" - }, - "destinations": { - "current_account-current_region": { - "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff.json", - "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" - } - } - } - }, - "dockerImages": {} -} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json deleted file mode 100644 index 971a1cb357e36..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json +++ /dev/null @@ -1,206 +0,0 @@ -{ - "Resources": { - "IAMServiceRole61C662C4": { - "Type": "AWS::IAM::Role", - "Properties": { - "AssumeRolePolicyDocument": { - "Statement": [ - { - "Action": "sts:AssumeRole", - "Effect": "Allow", - "Principal": { - "Service": "glue.amazonaws.com" - } - } - ], - "Version": "2012-10-17" - }, - "ManagedPolicyArns": [ - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":iam::aws:policy/service-role/AWSGlueServiceRole" - ] - ] - } - ] - } - }, - "IAMServiceRoleDefaultPolicy379D1A0E": { - "Type": "AWS::IAM::Policy", - "Properties": { - "PolicyDocument": { - "Statement": [ - { - "Action": [ - "s3:GetBucket*", - "s3:GetObject*", - "s3:List*" - ], - "Effect": "Allow", - "Resource": [ - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":s3:::", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - }, - "/*" - ] - ] - }, - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":s3:::", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - } - ] - ] - } - ] - } - ], - "Version": "2012-10-17" - }, - "PolicyName": "IAMServiceRoleDefaultPolicy379D1A0E", - "Roles": [ - { - "Ref": "IAMServiceRole61C662C4" - } - ] - } - }, - "BasicPySparkFlexEtlJobC50DC250": { - "Type": "AWS::Glue::Job", - "Properties": { - "Command": { - "Name": "glueetl", - "PythonVersion": "3", - "ScriptLocation": { - "Fn::Join": [ - "", - [ - "s3://", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - }, - "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" - ] - ] - } - }, - "DefaultArguments": { - "--job-language": "python", - "--enable-metrics": "", - "--enable-observability-metrics": "true" - }, - "ExecutionClass": "FLEX", - "GlueVersion": "3.0", - "NumberOfWorkers": 10, - "Role": { - "Fn::GetAtt": [ - "IAMServiceRole61C662C4", - "Arn" - ] - }, - "WorkerType": "G.1X" - } - }, - "OverridePySparkFlexEtlJob8EE4CFA1": { - "Type": "AWS::Glue::Job", - "Properties": { - "Command": { - "Name": "glueetl", - "PythonVersion": "3", - "ScriptLocation": { - "Fn::Join": [ - "", - [ - "s3://", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - }, - "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" - ] - ] - } - }, - "DefaultArguments": { - "--job-language": "python", - "--enable-metrics": "", - "--enable-observability-metrics": "true", - "arg1": "value1", - "arg2": "value2" - }, - "Description": "Optional Override PySpark Flex Etl Job", - "ExecutionClass": "FLEX", - "GlueVersion": "3.0", - "Name": "Optional Override PySpark Flex Etl Job", - "NumberOfWorkers": 20, - "Role": { - "Fn::GetAtt": [ - "IAMServiceRole61C662C4", - "Arn" - ] - }, - "Tags": { - "key": "value" - }, - "Timeout": 15, - "WorkerType": "G.1X" - } - } - }, - "Parameters": { - "BootstrapVersion": { - "Type": "AWS::SSM::Parameter::Value", - "Default": "/cdk-bootstrap/hnb659fds/version", - "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" - } - }, - "Rules": { - "CheckBootstrapVersion": { - "Assertions": [ - { - "Assert": { - "Fn::Not": [ - { - "Fn::Contains": [ - [ - "1", - "2", - "3", - "4", - "5" - ], - { - "Ref": "BootstrapVersion" - } - ] - } - ] - }, - "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." - } - ] - } - } -} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json deleted file mode 100644 index d77fab393274a..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "version": "36.0.0", - "files": { - "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { - "source": { - "path": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json", - "packaging": "file" - }, - "destinations": { - "current_account-current_region": { - "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", - "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" - } - } - } - }, - "dockerImages": {} -} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json deleted file mode 100644 index ad9d0fb73d1dd..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "Parameters": { - "BootstrapVersion": { - "Type": "AWS::SSM::Parameter::Value", - "Default": "/cdk-bootstrap/hnb659fds/version", - "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" - } - }, - "Rules": { - "CheckBootstrapVersion": { - "Assertions": [ - { - "Assert": { - "Fn::Not": [ - { - "Fn::Contains": [ - [ - "1", - "2", - "3", - "4", - "5" - ], - { - "Ref": "BootstrapVersion" - } - ] - } - ] - }, - "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." - } - ] - } - } -} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out deleted file mode 100644 index 1f0068d32659a..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/cdk.out +++ /dev/null @@ -1 +0,0 @@ -{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json deleted file mode 100644 index b837700f2ba0b..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/integ.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "version": "36.0.0", - "testCases": { - "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest": { - "stacks": [ - "aws-glue-job-pysparkflex-etl" - ], - "assertionStack": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert", - "assertionStackName": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951" - } - } -} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json deleted file mode 100644 index 8fb3110de8188..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/manifest.json +++ /dev/null @@ -1,131 +0,0 @@ -{ - "version": "36.0.0", - "artifacts": { - "aws-glue-job-pysparkflex-etl.assets": { - "type": "cdk:asset-manifest", - "properties": { - "file": "aws-glue-job-pysparkflex-etl.assets.json", - "requiresBootstrapStackVersion": 6, - "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" - } - }, - "aws-glue-job-pysparkflex-etl": { - "type": "aws:cloudformation:stack", - "environment": "aws://unknown-account/unknown-region", - "properties": { - "templateFile": "aws-glue-job-pysparkflex-etl.template.json", - "terminationProtection": false, - "validateOnSynth": false, - "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", - "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff.json", - "requiresBootstrapStackVersion": 6, - "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", - "additionalDependencies": [ - "aws-glue-job-pysparkflex-etl.assets" - ], - "lookupRole": { - "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", - "requiresBootstrapStackVersion": 8, - "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" - } - }, - "dependencies": [ - "aws-glue-job-pysparkflex-etl.assets" - ], - "metadata": { - "/aws-glue-job-pysparkflex-etl/IAMServiceRole/Resource": [ - { - "type": "aws:cdk:logicalId", - "data": "IAMServiceRole61C662C4" - } - ], - "/aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy/Resource": [ - { - "type": "aws:cdk:logicalId", - "data": "IAMServiceRoleDefaultPolicy379D1A0E" - } - ], - "/aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Resource": [ - { - "type": "aws:cdk:logicalId", - "data": "BasicPySparkFlexEtlJobC50DC250" - } - ], - "/aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob/Resource": [ - { - "type": "aws:cdk:logicalId", - "data": "OverridePySparkFlexEtlJob8EE4CFA1" - } - ], - "/aws-glue-job-pysparkflex-etl/BootstrapVersion": [ - { - "type": "aws:cdk:logicalId", - "data": "BootstrapVersion" - } - ], - "/aws-glue-job-pysparkflex-etl/CheckBootstrapVersion": [ - { - "type": "aws:cdk:logicalId", - "data": "CheckBootstrapVersion" - } - ] - }, - "displayName": "aws-glue-job-pysparkflex-etl" - }, - "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets": { - "type": "cdk:asset-manifest", - "properties": { - "file": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets.json", - "requiresBootstrapStackVersion": 6, - "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" - } - }, - "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951": { - "type": "aws:cloudformation:stack", - "environment": "aws://unknown-account/unknown-region", - "properties": { - "templateFile": "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.template.json", - "terminationProtection": false, - "validateOnSynth": false, - "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", - "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", - "requiresBootstrapStackVersion": 6, - "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", - "additionalDependencies": [ - "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets" - ], - "lookupRole": { - "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", - "requiresBootstrapStackVersion": 8, - "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" - } - }, - "dependencies": [ - "awsgluejobpysparkflexetlintegtestDefaultTestDeployAssert3F3EC951.assets" - ], - "metadata": { - "/aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion": [ - { - "type": "aws:cdk:logicalId", - "data": "BootstrapVersion" - } - ], - "/aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion": [ - { - "type": "aws:cdk:logicalId", - "data": "CheckBootstrapVersion" - } - ] - }, - "displayName": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert" - }, - "Tree": { - "type": "cdk:tree", - "properties": { - "file": "tree.json" - } - } - } -} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json deleted file mode 100644 index d3ebe4e6fb7ec..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pysparkflex-etl.js.snapshot/tree.json +++ /dev/null @@ -1,375 +0,0 @@ -{ - "version": "tree-0.1", - "tree": { - "id": "App", - "path": "", - "children": { - "aws-glue-job-pysparkflex-etl": { - "id": "aws-glue-job-pysparkflex-etl", - "path": "aws-glue-job-pysparkflex-etl", - "children": { - "IAMServiceRole": { - "id": "IAMServiceRole", - "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole", - "children": { - "ImportIAMServiceRole": { - "id": "ImportIAMServiceRole", - "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/ImportIAMServiceRole", - "constructInfo": { - "fqn": "aws-cdk-lib.Resource", - "version": "0.0.0" - } - }, - "Resource": { - "id": "Resource", - "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/Resource", - "attributes": { - "aws:cdk:cloudformation:type": "AWS::IAM::Role", - "aws:cdk:cloudformation:props": { - "assumeRolePolicyDocument": { - "Statement": [ - { - "Action": "sts:AssumeRole", - "Effect": "Allow", - "Principal": { - "Service": "glue.amazonaws.com" - } - } - ], - "Version": "2012-10-17" - }, - "managedPolicyArns": [ - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":iam::aws:policy/service-role/AWSGlueServiceRole" - ] - ] - } - ] - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.CfnRole", - "version": "0.0.0" - } - }, - "DefaultPolicy": { - "id": "DefaultPolicy", - "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy", - "children": { - "Resource": { - "id": "Resource", - "path": "aws-glue-job-pysparkflex-etl/IAMServiceRole/DefaultPolicy/Resource", - "attributes": { - "aws:cdk:cloudformation:type": "AWS::IAM::Policy", - "aws:cdk:cloudformation:props": { - "policyDocument": { - "Statement": [ - { - "Action": [ - "s3:GetBucket*", - "s3:GetObject*", - "s3:List*" - ], - "Effect": "Allow", - "Resource": [ - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":s3:::", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - }, - "/*" - ] - ] - }, - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":s3:::", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - } - ] - ] - } - ] - } - ], - "Version": "2012-10-17" - }, - "policyName": "IAMServiceRoleDefaultPolicy379D1A0E", - "roles": [ - { - "Ref": "IAMServiceRole61C662C4" - } - ] - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", - "version": "0.0.0" - } - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.Policy", - "version": "0.0.0" - } - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_iam.Role", - "version": "0.0.0" - } - }, - "BasicPySparkFlexEtlJob": { - "id": "BasicPySparkFlexEtlJob", - "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob", - "children": { - "Code2907ea7be4a583708cfffc21b3df1dfa": { - "id": "Code2907ea7be4a583708cfffc21b3df1dfa", - "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa", - "children": { - "Stage": { - "id": "Stage", - "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa/Stage", - "constructInfo": { - "fqn": "aws-cdk-lib.AssetStaging", - "version": "0.0.0" - } - }, - "AssetBucket": { - "id": "AssetBucket", - "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Code2907ea7be4a583708cfffc21b3df1dfa/AssetBucket", - "constructInfo": { - "fqn": "aws-cdk-lib.aws_s3.BucketBase", - "version": "0.0.0" - } - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_s3_assets.Asset", - "version": "0.0.0" - } - }, - "Resource": { - "id": "Resource", - "path": "aws-glue-job-pysparkflex-etl/BasicPySparkFlexEtlJob/Resource", - "attributes": { - "aws:cdk:cloudformation:type": "AWS::Glue::Job", - "aws:cdk:cloudformation:props": { - "command": { - "name": "glueetl", - "scriptLocation": { - "Fn::Join": [ - "", - [ - "s3://", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - }, - "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" - ] - ] - }, - "pythonVersion": "3" - }, - "defaultArguments": { - "--job-language": "python", - "--enable-metrics": "", - "--enable-observability-metrics": "true" - }, - "executionClass": "FLEX", - "glueVersion": "3.0", - "numberOfWorkers": 10, - "role": { - "Fn::GetAtt": [ - "IAMServiceRole61C662C4", - "Arn" - ] - }, - "workerType": "G.1X" - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_glue.CfnJob", - "version": "0.0.0" - } - } - }, - "constructInfo": { - "fqn": "@aws-cdk/aws-glue-alpha.PySparkFlexEtlJob", - "version": "0.0.0" - } - }, - "OverridePySparkFlexEtlJob": { - "id": "OverridePySparkFlexEtlJob", - "path": "aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob", - "children": { - "Resource": { - "id": "Resource", - "path": "aws-glue-job-pysparkflex-etl/OverridePySparkFlexEtlJob/Resource", - "attributes": { - "aws:cdk:cloudformation:type": "AWS::Glue::Job", - "aws:cdk:cloudformation:props": { - "command": { - "name": "glueetl", - "scriptLocation": { - "Fn::Join": [ - "", - [ - "s3://", - { - "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" - }, - "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" - ] - ] - }, - "pythonVersion": "3" - }, - "defaultArguments": { - "--job-language": "python", - "--enable-metrics": "", - "--enable-observability-metrics": "true", - "arg1": "value1", - "arg2": "value2" - }, - "description": "Optional Override PySpark Flex Etl Job", - "executionClass": "FLEX", - "glueVersion": "3.0", - "name": "Optional Override PySpark Flex Etl Job", - "numberOfWorkers": 20, - "role": { - "Fn::GetAtt": [ - "IAMServiceRole61C662C4", - "Arn" - ] - }, - "tags": { - "key": "value" - }, - "timeout": 15, - "workerType": "G.1X" - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.aws_glue.CfnJob", - "version": "0.0.0" - } - } - }, - "constructInfo": { - "fqn": "@aws-cdk/aws-glue-alpha.PySparkFlexEtlJob", - "version": "0.0.0" - } - }, - "BootstrapVersion": { - "id": "BootstrapVersion", - "path": "aws-glue-job-pysparkflex-etl/BootstrapVersion", - "constructInfo": { - "fqn": "aws-cdk-lib.CfnParameter", - "version": "0.0.0" - } - }, - "CheckBootstrapVersion": { - "id": "CheckBootstrapVersion", - "path": "aws-glue-job-pysparkflex-etl/CheckBootstrapVersion", - "constructInfo": { - "fqn": "aws-cdk-lib.CfnRule", - "version": "0.0.0" - } - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.Stack", - "version": "0.0.0" - } - }, - "aws-glue-job-pysparkflex-etl-integ-test": { - "id": "aws-glue-job-pysparkflex-etl-integ-test", - "path": "aws-glue-job-pysparkflex-etl-integ-test", - "children": { - "DefaultTest": { - "id": "DefaultTest", - "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest", - "children": { - "Default": { - "id": "Default", - "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/Default", - "constructInfo": { - "fqn": "constructs.Construct", - "version": "10.3.0" - } - }, - "DeployAssert": { - "id": "DeployAssert", - "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert", - "children": { - "BootstrapVersion": { - "id": "BootstrapVersion", - "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/BootstrapVersion", - "constructInfo": { - "fqn": "aws-cdk-lib.CfnParameter", - "version": "0.0.0" - } - }, - "CheckBootstrapVersion": { - "id": "CheckBootstrapVersion", - "path": "aws-glue-job-pysparkflex-etl-integ-test/DefaultTest/DeployAssert/CheckBootstrapVersion", - "constructInfo": { - "fqn": "aws-cdk-lib.CfnRule", - "version": "0.0.0" - } - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.Stack", - "version": "0.0.0" - } - } - }, - "constructInfo": { - "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", - "version": "0.0.0" - } - } - }, - "constructInfo": { - "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", - "version": "0.0.0" - } - }, - "Tree": { - "id": "Tree", - "path": "Tree", - "constructInfo": { - "fqn": "constructs.Construct", - "version": "10.3.0" - } - } - }, - "constructInfo": { - "fqn": "aws-cdk-lib.App", - "version": "0.0.0" - } - } -} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json index 4ed64c3a1fa9e..ddec1d20abcd4 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "fe963e7893418cd93f28f2e8e97fe684357d3752eec25a95ff05d6ff51f1cc6a": { + "5d07dbbcf92246e5c8cb128178cd6f66a5abc0d138f9470a93062a2449c80a14": { "source": { "path": "aws-glue-job-scalaspark-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "fe963e7893418cd93f28f2e8e97fe684357d3752eec25a95ff05d6ff51f1cc6a.json", + "objectKey": "5d07dbbcf92246e5c8cb128178cd6f66a5abc0d138f9470a93062a2449c80a14.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json index c8b6395f713bc..08fdb61a25ecd 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json @@ -120,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverrideScalaSparkETLJobC019089C": { @@ -163,7 +163,7 @@ "key": "value" }, "Timeout": 15, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } } }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json index c93c9444a7e62..ef4e906503ebd 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/fe963e7893418cd93f28f2e8e97fe684357d3752eec25a95ff05d6ff51f1cc6a.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/5d07dbbcf92246e5c8cb128178cd6f66a5abc0d138f9470a93062a2449c80a14.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json index d35613ddc768d..2a2fa48e38b77 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json @@ -205,7 +205,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { @@ -266,7 +266,7 @@ "key": "value" }, "timeout": 15, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json index ca63d7d53bea7..246e028be6d6f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "237dbe47db947248325fa078c4275b440ec5fbd928f898566437c7273c49487d": { + "ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f": { "source": { "path": "aws-glue-job-scalasparkflex-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "237dbe47db947248325fa078c4275b440ec5fbd928f898566437c7273c49487d.json", + "objectKey": "ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json index a7b39ad547b08..f046068d73e7c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json @@ -121,7 +121,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverrideScalaSparkFlexEtlJob843D93B4": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json index 3e947e62d8097..76778efc60610 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/237dbe47db947248325fa078c4275b440ec5fbd928f898566437c7273c49487d.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json index dd37a0c8b14c4..c8dc5ada88490 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json @@ -206,7 +206,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json index 203d49224d57a..f459de5343980 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json @@ -14,7 +14,7 @@ } } }, - "6912184a6fddb01d6471c0c01a1140018cfbb2317d9c43706f2baaf7ef71d8f2": { + "c3b8ab6feb5555c17f340cd7ad1615f3d3ae8d9a61d1966df25b95256a6da475": { "source": { "path": "aws-glue-job-scalaspark-streaming.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "6912184a6fddb01d6471c0c01a1140018cfbb2317d9c43706f2baaf7ef71d8f2.json", + "objectKey": "c3b8ab6feb5555c17f340cd7ad1615f3d3ae8d9a61d1966df25b95256a6da475.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json index 99ae06b7e8fba..d44b72c813adb 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json @@ -120,7 +120,7 @@ "Arn" ] }, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } }, "OverrideScalaSparkStreamingJob598931ED": { @@ -163,7 +163,7 @@ "key": "value" }, "Timeout": 15, - "WorkerType": "G.2X" + "WorkerType": "G.1X" } } }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json index b8848ef046158..18e6da3fb4c07 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/6912184a6fddb01d6471c0c01a1140018cfbb2317d9c43706f2baaf7ef71d8f2.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/c3b8ab6feb5555c17f340cd7ad1615f3d3ae8d9a61d1966df25b95256a6da475.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json index 7fe9ebbe1ff9d..896cee852e070 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json @@ -205,7 +205,7 @@ "Arn" ] }, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { @@ -266,7 +266,7 @@ "key": "value" }, "timeout": 15, - "workerType": "G.2X" + "workerType": "G.1X" } }, "constructInfo": { From 593e877e4917f445498b3d1145fbc30546637a79 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Sat, 29 Jun 2024 00:41:48 +0000 Subject: [PATCH 35/51] Refactor coninuous logging default enabled plus unit tests in pyspark-etl-job and fixed other unit tests --- .../@aws-cdk/aws-glue-alpha/lib/jobs/job.ts | 23 +++-- .../lib/jobs/pyspark-etl-job.ts | 7 +- .../lib/jobs/pyspark-flex-etl-job.ts | 2 +- .../lib/jobs/pyspark-streaming-job.ts | 2 +- .../lib/jobs/python-shell-job.ts | 8 ++ .../aws-glue-alpha/lib/jobs/ray-job.ts | 2 +- .../lib/jobs/scala-spark-etl-job.ts | 2 +- .../lib/jobs/scala-spark-flex-etl-job.ts | 2 +- .../lib/jobs/scala-spark-streaming-job.ts | 2 +- .../@aws-cdk/aws-glue-alpha/test/code.test.ts | 20 ++--- .../aws-glue-job-pyspark-etl.assets.json | 4 +- .../aws-glue-job-pyspark-etl.template.json | 2 + .../manifest.json | 2 +- .../tree.json | 2 + .../aws-glue-job-scalaspark-etl.assets.json | 4 +- .../aws-glue-job-scalaspark-etl.template.json | 2 + .../manifest.json | 2 +- .../tree.json | 2 + .../test/pyspark-etl-jobs.test.ts | 83 ++++++++++++++++++- 19 files changed, 140 insertions(+), 33 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts index c727856361dab..04d62997806ed 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts @@ -499,23 +499,36 @@ export abstract class Job extends JobBase { * @param props The properties for continuous logging configuration * @returns String containing the args for the continuous logging command */ - public setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) { + public setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps | undefined) : any { + + // If the developer has explicitly disabled continuous logging return no args + if (props && !props.enabled) { + return {}; + } + + // Else we turn on continuous logging by default. Determine what log group to use. const args: {[key: string]: string} = { '--enable-continuous-cloudwatch-log': 'true', - '--enable-continuous-log-filter': (props.quiet ?? true).toString(), }; - if (props.logGroup) { + if (props?.quiet) { + args['--enable-continuous-log-filter'] = (props.quiet ?? true).toString(); + }; + + // If the developer provided a log group, add its name to the args and update the role. + if (props?.logGroup) { args['--continuous-log-logGroup'] = props.logGroup.logGroupName; props.logGroup.grantWrite(role); } - if (props.logStreamPrefix) { + if (props?.logStreamPrefix) { args['--continuous-log-logStreamPrefix'] = props.logStreamPrefix; } - if (props.conversionPattern) { + + if (props?.conversionPattern) { args['--continuous-log-conversionPattern'] = props.conversionPattern; } + return args; } diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts index ff83123b0b668..f20ac3d1dbf1a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -62,8 +62,6 @@ export class PySparkEtlJob extends Job { public readonly role: iam.IRole; public readonly grantPrincipal: iam.IPrincipal; - //private logGroup: LogGroup; - /** * The Spark UI logs location if Spark UI monitoring and debugging is enabled. * @@ -85,6 +83,8 @@ export class PySparkEtlJob extends Job { physicalName: props.jobName, }); + this.jobName = props.jobName ?? ''; + // Set up role and permissions for principal this.role = props.role, { assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), @@ -97,7 +97,7 @@ export class PySparkEtlJob extends Job { this.sparkUILoggingLocation = sparkUIArgs?.location; // Enable CloudWatch metrics and continuous logging by default as a best practice - const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const continuousLoggingArgs = this.setupContinuousLogging(this.role, props.continuousLogging); const profilingMetricsArgs = { '--enable-metrics': '' }; const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; @@ -132,7 +132,6 @@ export class PySparkEtlJob extends Job { numberOfWorkers: props.numberOfWorkers ? props.numberOfWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, - //notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, timeout: props.timeout?.toMinutes(), connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, securityConfiguration: props.securityConfiguration?.securityConfigurationName, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts index 858f9f1a6ea53..519b368ed26fb 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-flex-etl-job.ts @@ -104,7 +104,7 @@ export class PySparkFlexEtlJob extends Job { this.sparkUILoggingLocation = sparkUIArgs?.location; // Enable CloudWatch metrics and continuous logging by default as a best practice - const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const continuousLoggingArgs = this.setupContinuousLogging(this.role, props.continuousLogging); const profilingMetricsArgs = { '--enable-metrics': '' }; const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts index 60e8ad279c191..1ff51cdee2f3a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-streaming-job.ts @@ -98,7 +98,7 @@ export class PySparkStreamingJob extends Job { this.sparkUILoggingLocation = sparkUIArgs?.location; // Enable CloudWatch metrics and continuous logging by default as a best practice - const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const continuousLoggingArgs = this.setupContinuousLogging(this.role, props.continuousLogging); const profilingMetricsArgs = { '--enable-metrics': '' }; const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts index 966454da77457..b1d0da0ca33da 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts @@ -59,12 +59,20 @@ export class PythonShellJob extends Job { }; this.grantPrincipal = this.role; + // Enable CloudWatch metrics and continuous logging by default as a best practice + const continuousLoggingArgs = this.setupContinuousLogging(this.role, props.continuousLogging); + const profilingMetricsArgs = { '--enable-metrics': '' }; + const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; + // Gather executable arguments const executableArgs = this.executableArguments(props); // Combine command line arguments into a single line item const defaultArguments = { ...executableArgs, + ...continuousLoggingArgs, + ...profilingMetricsArgs, + ...observabilityMetricsArgs, ...this.checkNoReservedArgs(props.defaultArguments), }; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts index 5171520ff7a07..44d979833f58f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts @@ -61,7 +61,7 @@ export class RayJob extends Job { this.grantPrincipal = this.role; // Enable CloudWatch metrics and continuous logging by default as a best practice - const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const continuousLoggingArgs = this.setupContinuousLogging(this.role, props.continuousLogging); const profilingMetricsArgs = { '--enable-metrics': '' }; const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts index d4b7cd8bb1a6b..c472cf314cf3f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-etl-job.ts @@ -95,7 +95,7 @@ export class ScalaSparkEtlJob extends Job { this.sparkUILoggingLocation = sparkUIArgs?.location; // Enable CloudWatch metrics and continuous logging by default as a best practice - const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const continuousLoggingArgs = this.setupContinuousLogging(this.role, props.continuousLogging); const profilingMetricsArgs = { '--enable-metrics': '' }; const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts index 781ea5ba2b957..5d443f435178b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-flex-etl-job.ts @@ -134,7 +134,7 @@ export class ScalaSparkFlexEtlJob extends Job { this.sparkUILoggingLocation = sparkUIArgs?.location; // Enable CloudWatch metrics and continuous logging by default as a best practice - const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const continuousLoggingArgs = this.setupContinuousLogging(this.role, props.continuousLogging); const profilingMetricsArgs = { '--enable-metrics': '' }; const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts index 7085cb2e71d37..83c843342d7bd 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/scala-spark-streaming-job.ts @@ -94,7 +94,7 @@ export class ScalaSparkStreamingJob extends Job { this.sparkUILoggingLocation = sparkUIArgs?.location; // Enable CloudWatch metrics and continuous logging by default as a best practice - const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; + const continuousLoggingArgs = this.setupContinuousLogging(this.role, props.continuousLogging); const profilingMetricsArgs = { '--enable-metrics': '' }; const observabilityMetricsArgs = { '--enable-observability-metrics': 'true' }; diff --git a/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts index 93e2f9aeb24e2..45dd93d63a6b5 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts @@ -1,5 +1,5 @@ import * as path from 'path'; -import { Template } from 'aws-cdk-lib/assertions'; +import { Template, Match } from 'aws-cdk-lib/assertions'; import * as s3 from 'aws-cdk-lib/aws-s3'; import * as cdk from 'aws-cdk-lib'; import * as cxapi from 'aws-cdk-lib/cx-api'; @@ -23,7 +23,7 @@ describe('Code', () => { bucket = s3.Bucket.fromBucketName(stack, 'Bucket', 'bucketname'); script = glue.Code.fromBucket(bucket, key); - new glue.PythonShellJob(stack, 'Job1', { + const job = new glue.PythonShellJob(stack, 'Job1', { script, role: new Role(stack, 'Role', { assumedBy: new ServicePrincipal('glue.amazonaws.com'), @@ -78,7 +78,7 @@ describe('Code', () => { }, Roles: [ { - Ref: 'Job1ServiceRole7AF34CCA', + Ref: Match.stringLikeRegexp('Role'), }, ], }); @@ -193,7 +193,7 @@ describe('Code', () => { }, Roles: [ { - Ref: 'Job1ServiceRole7AF34CCA', + Ref: Match.stringLikeRegexp('Role'), }, ], }); @@ -207,13 +207,13 @@ describe('Code', () => { test('used in more than 1 job in the same stack should be reused', () => { new glue.PythonShellJob(stack, 'Job1', { script, - role: new Role(stack, 'Role', { + role: new Role(stack, 'Role1', { assumedBy: new ServicePrincipal('glue.amazonaws.com'), }), }); new glue.PythonShellJob(stack, 'Job2', { script, - role: new Role(stack, 'Role', { + role: new Role(stack, 'Role2', { assumedBy: new ServicePrincipal('glue.amazonaws.com'), }), }); @@ -264,7 +264,7 @@ describe('Code', () => { }, Role: { 'Fn::GetAtt': [ - 'Job1ServiceRole7AF34CCA', + Match.stringLikeRegexp('Role'), 'Arn', ], }, @@ -275,7 +275,7 @@ describe('Code', () => { }, Role: { 'Fn::GetAtt': [ - 'Job2ServiceRole5D2B98FE', + Match.stringLikeRegexp('Role'), 'Arn', ], }, @@ -285,7 +285,7 @@ describe('Code', () => { test('throws if trying to rebind in another stack', () => { new glue.PythonShellJob(stack, 'Job1', { script, - role: new Role(stack, 'Role', { + role: new Role(stack, 'Role1', { assumedBy: new ServicePrincipal('glue.amazonaws.com'), }), }); @@ -293,7 +293,7 @@ describe('Code', () => { expect(() => new glue.PythonShellJob(differentStack, 'Job1', { script, - role: new Role(stack, 'Role', { + role: new Role(stack, 'Role2', { assumedBy: new ServicePrincipal('glue.amazonaws.com'), }), })).toThrow(/associated with another stack/); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json index 6b7e51197ff6d..653b948c07787 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "b69265e3929a8fd6ea69db3797d7382df3816ef168f310b15d41ba6fe9f00e81": { + "4799b81562fc3fe83d1f986c3c439f80d36cbfc9421a3e8558060ffaf8616aa0": { "source": { "path": "aws-glue-job-pyspark-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "b69265e3929a8fd6ea69db3797d7382df3816ef168f310b15d41ba6fe9f00e81.json", + "objectKey": "4799b81562fc3fe83d1f986c3c439f80d36cbfc9421a3e8558060ffaf8616aa0.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json index 9da907d9a9057..5f2562c5ca63d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/aws-glue-job-pyspark-etl.template.json @@ -109,6 +109,7 @@ }, "DefaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -144,6 +145,7 @@ }, "DefaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json index 35410085e0eec..d4ea673146612 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/b69265e3929a8fd6ea69db3797d7382df3816ef168f310b15d41ba6fe9f00e81.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/4799b81562fc3fe83d1f986c3c439f80d36cbfc9421a3e8558060ffaf8616aa0.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json index b68d28f106e7f..974404f968b15 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-etl.js.snapshot/tree.json @@ -194,6 +194,7 @@ }, "defaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -247,6 +248,7 @@ }, "defaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json index ddec1d20abcd4..bccb1a07c98a9 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "5d07dbbcf92246e5c8cb128178cd6f66a5abc0d138f9470a93062a2449c80a14": { + "95d6306a689415ff849d8061f263d71b4ee7eab3bb724e06f1356c346a111258": { "source": { "path": "aws-glue-job-scalaspark-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "5d07dbbcf92246e5c8cb128178cd6f66a5abc0d138f9470a93062a2449c80a14.json", + "objectKey": "95d6306a689415ff849d8061f263d71b4ee7eab3bb724e06f1356c346a111258.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json index 08fdb61a25ecd..5fb005ed30ff3 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/aws-glue-job-scalaspark-etl.template.json @@ -109,6 +109,7 @@ "DefaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -144,6 +145,7 @@ "DefaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json index ef4e906503ebd..ae9bae3832736 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/5d07dbbcf92246e5c8cb128178cd6f66a5abc0d138f9470a93062a2449c80a14.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/95d6306a689415ff849d8061f263d71b4ee7eab3bb724e06f1356c346a111258.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json index 2a2fa48e38b77..a790ddecb7f3e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-etl.js.snapshot/tree.json @@ -194,6 +194,7 @@ "defaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -247,6 +248,7 @@ "defaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts index 2f3443be45adb..a598468a45c70 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts @@ -2,7 +2,8 @@ import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import { Template } from 'aws-cdk-lib/assertions'; +import { Template, Match } from 'aws-cdk-lib/assertions'; +import { LogGroup } from 'aws-cdk-lib/aws-logs'; describe('Job', () => { let stack: cdk.Stack; @@ -21,7 +22,11 @@ describe('Job', () => { describe('Create new PySpark ETL Job with default parameters', () => { beforeEach(() => { - job = new glue.PySparkEtlJob(stack, 'ImportedJob', { role, script }); + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + role, + script, + jobName: 'PySparkETLJob', + }); }); test('Test default attributes', () => { @@ -39,6 +44,17 @@ describe('Job', () => { }); }); + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + test('Default numberOfWorkers should be 10', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { NumberOfWorkers: 10, @@ -51,4 +67,67 @@ describe('Job', () => { }); }); }); + + describe('Create new PySpark ETL Job with log override parameters', () => { + + beforeEach(() => { + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + jobName: 'PySparkETLJob', + role, + script, + continuousLogging: { + enabled: true, + quiet: true, + logGroup: new LogGroup(stack, 'logGroup', { + logGroupName: '/aws-glue/jobs/${job.jobName}', + }), + logStreamPrefix: 'logStreamPrefix', + conversionPattern: 'convert', + }, + }); + }); + + test('Has Continuous Logging enabled with optional args', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--continuous-log-logGroup': Match.objectLike({ + Ref: Match.anyValue(), + }), + '--enable-continuous-cloudwatch-log': 'true', + '--enable-continuous-log-filter': 'true', + '--continuous-log-logStreamPrefix': 'logStreamPrefix', + '--continuous-log-conversionPattern': 'convert', + }), + }); + }); + + }); + + describe('Create new PySpark ETL Job with logging explicitly disabled', () => { + + beforeEach(() => { + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + jobName: 'PySparkETLJob', + role, + script, + continuousLogging: { + enabled: false, + }, + }); + }); + + test('Has Continuous Logging Disabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: { + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + }, + }); + }); + + }); }); \ No newline at end of file From a75ac691908eed2fd9dc6c09fe8e00aed3dff3e8 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Thu, 11 Jul 2024 22:04:52 +0000 Subject: [PATCH 36/51] Delete legacy Glue Job classes and tests post-merge --- .../aws-glue-alpha/lib/job-executable.ts | 527 -------- packages/@aws-cdk/aws-glue-alpha/lib/job.ts | 921 ------------- .../@aws-cdk/aws-glue-alpha/test/code.test.ts | 2 +- .../@aws-cdk/aws-glue-alpha/test/integ.job.ts | 147 -- .../test/job-executable.test.ts | 282 ---- .../@aws-cdk/aws-glue-alpha/test/job.test.ts | 1180 ----------------- 6 files changed, 1 insertion(+), 3058 deletions(-) delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/job.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts delete mode 100644 packages/@aws-cdk/aws-glue-alpha/test/job.test.ts diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts deleted file mode 100644 index 4bee0a054bcd8..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job-executable.ts +++ /dev/null @@ -1,527 +0,0 @@ -import { Code } from './code'; - -/** - * AWS Glue version determines the versions of Apache Spark and Python that are available to the job. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html. - * - * If you need to use a GlueVersion that doesn't exist as a static member, you - * can instantiate a `GlueVersion` object, e.g: `GlueVersion.of('1.5')`. - */ -export class GlueVersion { - /** - * Glue version using Spark 2.2.1 and Python 2.7 - */ - public static readonly V0_9 = new GlueVersion('0.9'); - - /** - * Glue version using Spark 2.4.3, Python 2.7 and Python 3.6 - */ - public static readonly V1_0 = new GlueVersion('1.0'); - - /** - * Glue version using Spark 2.4.3 and Python 3.7 - */ - public static readonly V2_0 = new GlueVersion('2.0'); - - /** - * Glue version using Spark 3.1.1 and Python 3.7 - */ - public static readonly V3_0 = new GlueVersion('3.0'); - - /** - * Glue version using Spark 3.3.0 and Python 3.10 - */ - public static readonly V4_0 = new GlueVersion('4.0'); - - /** - * Custom Glue version - * @param version custom version - */ - public static of(version: string): GlueVersion { - return new GlueVersion(version); - } - - /** - * The name of this GlueVersion, as expected by Job resource. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} - -/** - * Runtime language of the Glue job - */ -export enum JobLanguage { - /** - * Scala - */ - SCALA = 'scala', - - /** - * Python - */ - PYTHON = 'python', -} - -/** - * Python version - */ -export enum PythonVersion { - /** - * Python 2 (the exact version depends on GlueVersion and JobCommand used) - */ - TWO = '2', - - /** - * Python 3 (the exact version depends on GlueVersion and JobCommand used) - */ - THREE = '3', - - /** - * Python 3.9 (the exact version depends on GlueVersion and JobCommand used) - */ - THREE_NINE = '3.9', -} - -/** - * AWS Glue runtime determines the runtime engine of the job. - * - */ -export class Runtime { - /** - * Runtime for a Glue for Ray 2.4. - */ - public static readonly RAY_TWO_FOUR = new Runtime('Ray2.4'); - - /** - * Custom runtime - * @param runtime custom runtime - */ - public static of(runtime: string): Runtime { - return new Runtime(runtime); - } - - /** - * The name of this Runtime. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} - -/** - * The job type. - * - * If you need to use a JobType that doesn't exist as a static member, you - * can instantiate a `JobType` object, e.g: `JobType.of('other name')`. - */ -export class JobType { - /** - * Command for running a Glue Spark job. - */ - public static readonly ETL = new JobType('glueetl'); - - /** - * Command for running a Glue Spark streaming job. - */ - public static readonly STREAMING = new JobType('gluestreaming'); - - /** - * Command for running a Glue python shell job. - */ - public static readonly PYTHON_SHELL = new JobType('pythonshell'); - - /** - * Command for running a Glue Ray job. - */ - public static readonly RAY = new JobType('glueray'); - - /** - * Custom type name - * @param name type name - */ - public static of(name: string): JobType { - return new JobType(name); - } - - /** - * The name of this JobType, as expected by Job resource. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} - -interface PythonExecutableProps { - /** - * The Python version to use. - */ - readonly pythonVersion: PythonVersion; - - /** - * Additional Python files that AWS Glue adds to the Python path before executing your script. - * Only individual files are supported, directories are not supported. - * Equivalent to a job parameter `--extra-py-files`. - * - * @default - no extra python files and argument is not set - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraPythonFiles?: Code[]; -} - -interface RayExecutableProps { - /** - * The Python version to use. - */ - readonly pythonVersion: PythonVersion; - - /** - * Additional Python modules that AWS Glue adds to the Python path before executing your script. - * Equivalent to a job parameter `--s3-py-modules`. - * - * @default - no extra python files and argument is not set - * - * @see https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html - */ - readonly s3PythonModules?: Code[]; -} - -interface SharedJobExecutableProps { - /** - * Runtime. It is required for Ray jobs. - * - */ - readonly runtime?: Runtime; - - /** - * Glue version. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/release-notes.html - */ - readonly glueVersion: GlueVersion; - - /** - * The script that executes a job. - */ - readonly script: Code; - - /** - * Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it. - * Only individual files are supported, directories are not supported. - * Equivalent to a job parameter `--extra-files`. - * - * @default [] - no extra files are copied to the working directory - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraFiles?: Code[]; -} - -interface SharedSparkJobExecutableProps extends SharedJobExecutableProps { - /** - * Additional Java .jar files that AWS Glue adds to the Java classpath before executing your script. - * Only individual files are supported, directories are not supported. - * Equivalent to a job parameter `--extra-jars`. - * - * @default [] - no extra jars are added to the classpath - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraJars?: Code[]; - - /** - * Setting this value to true prioritizes the customer's extra JAR files in the classpath. - * Equivalent to a job parameter `--user-jars-first`. - * - * @default false - priority is not given to user-provided jars - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraJarsFirst?: boolean; -} - -/** - * Props for creating a Scala Spark (ETL or Streaming) job executable - */ -export interface ScalaJobExecutableProps extends SharedSparkJobExecutableProps { - /** - * The fully qualified Scala class name that serves as the entry point for the job. - * Equivalent to a job parameter `--class`. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly className: string; -} - -/** - * Props for creating a Python Spark (ETL or Streaming) job executable - */ -export interface PythonSparkJobExecutableProps extends SharedSparkJobExecutableProps, PythonExecutableProps {} - -/** - * Props for creating a Python shell job executable - */ -export interface PythonShellExecutableProps extends SharedJobExecutableProps, PythonExecutableProps {} - -/** - * Props for creating a Python Ray job executable - */ -export interface PythonRayExecutableProps extends SharedJobExecutableProps, RayExecutableProps {} - -/** - * The executable properties related to the Glue job's GlueVersion, JobType and code - */ -export class JobExecutable { - - /** - * Create Scala executable props for Apache Spark ETL job. - * - * @param props Scala Apache Spark Job props - */ - public static scalaEtl(props: ScalaJobExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.ETL, - language: JobLanguage.SCALA, - }); - } - - /** - * Create Scala executable props for Apache Spark Streaming job. - * - * @param props Scala Apache Spark Job props - */ - public static scalaStreaming(props: ScalaJobExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.STREAMING, - language: JobLanguage.SCALA, - }); - } - - /** - * Create Python executable props for Apache Spark ETL job. - * - * @param props Python Apache Spark Job props - */ - public static pythonEtl(props: PythonSparkJobExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.ETL, - language: JobLanguage.PYTHON, - }); - } - - /** - * Create Python executable props for Apache Spark Streaming job. - * - * @param props Python Apache Spark Job props - */ - public static pythonStreaming(props: PythonSparkJobExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.STREAMING, - language: JobLanguage.PYTHON, - }); - } - - /** - * Create Python executable props for python shell jobs. - * - * @param props Python Shell Job props. - */ - public static pythonShell(props: PythonShellExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.PYTHON_SHELL, - language: JobLanguage.PYTHON, - }); - } - - /** - * Create Python executable props for Ray jobs. - * - * @param props Ray Job props. - */ - public static pythonRay(props: PythonRayExecutableProps): JobExecutable { - return new JobExecutable({ - ...props, - type: JobType.RAY, - language: JobLanguage.PYTHON, - }); - } - - /** - * Create a custom JobExecutable. - * - * @param config custom job executable configuration. - */ - public static of(config: JobExecutableConfig): JobExecutable { - return new JobExecutable(config); - } - - private config: JobExecutableConfig; - - private constructor(config: JobExecutableConfig) { - const glueVersion = config.glueVersion.name; - const type = config.type.name; - if (JobType.PYTHON_SHELL.name === type) { - if (config.language !== JobLanguage.PYTHON) { - throw new Error('Python shell requires the language to be set to Python'); - } - if ([GlueVersion.V0_9.name, GlueVersion.V4_0.name].includes(glueVersion)) { - throw new Error(`Specified GlueVersion ${glueVersion} does not support Python Shell`); - } - } - if (JobType.RAY.name === type) { - if (config.language !== JobLanguage.PYTHON) { - throw new Error('Ray requires the language to be set to Python'); - } - if ([GlueVersion.V0_9.name, GlueVersion.V1_0.name, GlueVersion.V2_0.name, GlueVersion.V3_0.name].includes(glueVersion)) { - throw new Error(`Specified GlueVersion ${glueVersion} does not support Ray`); - } - } - if (config.extraJarsFirst && [GlueVersion.V0_9.name, GlueVersion.V1_0.name].includes(glueVersion)) { - throw new Error(`Specified GlueVersion ${glueVersion} does not support extraJarsFirst`); - } - if (config.pythonVersion === PythonVersion.TWO && ![GlueVersion.V0_9.name, GlueVersion.V1_0.name].includes(glueVersion)) { - throw new Error(`Specified GlueVersion ${glueVersion} does not support PythonVersion ${config.pythonVersion}`); - } - if (JobLanguage.PYTHON !== config.language && config.extraPythonFiles) { - throw new Error('extraPythonFiles is not supported for languages other than JobLanguage.PYTHON'); - } - if (config.extraPythonFiles && type === JobType.RAY.name) { - throw new Error('extraPythonFiles is not supported for Ray jobs'); - } - if (config.pythonVersion === PythonVersion.THREE_NINE && type !== JobType.PYTHON_SHELL.name && type !== JobType.RAY.name) { - throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell and Ray'); - } - if (config.pythonVersion === PythonVersion.THREE && type === JobType.RAY.name) { - throw new Error('Specified PythonVersion PythonVersion.THREE is not supported for Ray'); - } - if (config.runtime === undefined && type === JobType.RAY.name) { - throw new Error('Runtime is required for Ray jobs'); - } - this.config = config; - } - - /** - * Called during Job initialization to get JobExecutableConfig. - */ - public bind(): JobExecutableConfig { - return this.config; - } -} - -/** - * Result of binding a `JobExecutable` into a `Job`. - */ -export interface JobExecutableConfig { - /** - * Glue version. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/release-notes.html - */ - readonly glueVersion: GlueVersion; - - /** - * The language of the job (Scala or Python). - * Equivalent to a job parameter `--job-language`. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly language: JobLanguage; - - /** - * Specify the type of the job whether it's an Apache Spark ETL or streaming one or if it's a Python shell job. - */ - readonly type: JobType; - - /** - * The Python version to use. - * - * @default - no python version specified - */ - readonly pythonVersion?: PythonVersion; - - /** - * The Runtime to use. - * - * @default - no runtime specified - */ - readonly runtime?: Runtime; - - /** - * The script that is executed by a job. - */ - readonly script: Code; - - /** - * The Scala class that serves as the entry point for the job. This applies only if your the job langauage is Scala. - * Equivalent to a job parameter `--class`. - * - * @default - no scala className specified - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly className?: string; - - /** - * Additional Java .jar files that AWS Glue adds to the Java classpath before executing your script. - * Equivalent to a job parameter `--extra-jars`. - * - * @default - no extra jars specified. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraJars?: Code[]; - - /** - * Additional Python files that AWS Glue adds to the Python path before executing your script. - * Equivalent to a job parameter `--extra-py-files`. - * - * @default - no extra python files specified. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraPythonFiles?: Code[]; - - /** - * Additional Python modules that AWS Glue adds to the Python path before executing your script. - * Equivalent to a job parameter `--s3-py-modules`. - * - * @default - no extra python files specified. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html - */ - readonly s3PythonModules?: Code[]; - - /** - * Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it. - * Equivalent to a job parameter `--extra-files`. - * - * @default - no extra files specified. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraFiles?: Code[]; - - /** - * Setting this value to true prioritizes the customer's extra JAR files in the classpath. - * Equivalent to a job parameter `--user-jars-first`. - * - * @default - extra jars are not prioritized. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly extraJarsFirst?: boolean; -} diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts deleted file mode 100644 index 813894f0b6898..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts +++ /dev/null @@ -1,921 +0,0 @@ -import { EOL } from 'os'; -import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; -import * as events from 'aws-cdk-lib/aws-events'; -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as logs from 'aws-cdk-lib/aws-logs'; -import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as cdk from 'aws-cdk-lib/core'; -import * as constructs from 'constructs'; -import { Code, GlueVersion, JobExecutable, JobExecutableConfig, JobType } from '.'; -import { IConnection } from './connection'; -import { CfnJob } from 'aws-cdk-lib/aws-glue'; -import { ISecurityConfiguration } from './security-configuration'; - -/** - * The type of predefined worker that is allocated when a job runs. - * - * If you need to use a WorkerType that doesn't exist as a static member, you - * can instantiate a `WorkerType` object, e.g: `WorkerType.of('other type')`. - */ -export class WorkerType { - /** - * Each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. - */ - public static readonly STANDARD = new WorkerType('Standard'); - - /** - * Each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for memory-intensive jobs. - */ - public static readonly G_1X = new WorkerType('G.1X'); - - /** - * Each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. Suitable for memory-intensive jobs. - */ - public static readonly G_2X = new WorkerType('G.2X'); - - /** - * Each worker maps to 4 DPU (16 vCPU, 64 GB of memory, 256 GB disk), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for AWS Glue version 3.0 or later jobs. - */ - public static readonly G_4X = new WorkerType('G.4X'); - - /** - * Each worker maps to 8 DPU (32 vCPU, 128 GB of memory, 512 GB disk), and provides 1 executor per worker. We recommend this worker type for jobs whose workloads contain your most demanding transforms, aggregations, joins, and queries. This worker type is available only for AWS Glue version 3.0 or later jobs. - */ - public static readonly G_8X = new WorkerType('G.8X'); - - /** - * Each worker maps to 0.25 DPU (2 vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for low volume streaming jobs. - */ - public static readonly G_025X = new WorkerType('G.025X'); - - /** - * Each worker maps to 2 high-memory DPU [M-DPU] (8 vCPU, 64 GB of memory, 128 GB disk). Supported in Ray jobs. - */ - public static readonly Z_2X = new WorkerType('Z.2X'); - - /** - * Custom worker type - * @param workerType custom worker type - */ - public static of(workerType: string): WorkerType { - return new WorkerType(workerType); - } - - /** - * The name of this WorkerType, as expected by Job resource. - */ - public readonly name: string; - - private constructor(name: string) { - this.name = name; - } -} - -/** - * Job states emitted by Glue to CloudWatch Events. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types for more information. - */ -export enum JobState { - /** - * State indicating job run succeeded - */ - SUCCEEDED = 'SUCCEEDED', - - /** - * State indicating job run failed - */ - FAILED = 'FAILED', - - /** - * State indicating job run timed out - */ - TIMEOUT = 'TIMEOUT', - - /** - * State indicating job is starting - */ - STARTING = 'STARTING', - - /** - * State indicating job is running - */ - RUNNING = 'RUNNING', - - /** - * State indicating job is stopping - */ - STOPPING = 'STOPPING', - - /** - * State indicating job stopped - */ - STOPPED = 'STOPPED', -} - -/** - * The Glue CloudWatch metric type. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html - */ -export enum MetricType { - /** - * A value at a point in time. - */ - GAUGE = 'gauge', - - /** - * An aggregate number. - */ - COUNT = 'count', -} - -/** - * The ExecutionClass whether the job is run with a standard or flexible execution class. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html#aws-glue-api-jobs-job-Job - * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html - */ -export enum ExecutionClass { - /** - * The flexible execution class is appropriate for time-insensitive jobs whose start - * and completion times may vary. - */ - FLEX = 'FLEX', - - /** - * The standard execution class is ideal for time-sensitive workloads that require fast job - * startup and dedicated resources. - */ - STANDARD = 'STANDARD', -} - -/** - * Interface representing a created or an imported `Job`. - */ -export interface IJob extends cdk.IResource, iam.IGrantable { - /** - * The name of the job. - * @attribute - */ - readonly jobName: string; - - /** - * The ARN of the job. - * @attribute - */ - readonly jobArn: string; - - /** - * Defines a CloudWatch event rule triggered when something happens with this job. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - onEvent(id: string, options?: events.OnEventOptions): events.Rule; - - /** - * Defines a CloudWatch event rule triggered when this job moves to the input jobState. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - onStateChange(id: string, jobState: JobState, options?: events.OnEventOptions): events.Rule; - - /** - * Defines a CloudWatch event rule triggered when this job moves to the SUCCEEDED state. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - onSuccess(id: string, options?: events.OnEventOptions): events.Rule; - - /** - * Defines a CloudWatch event rule triggered when this job moves to the FAILED state. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - onFailure(id: string, options?: events.OnEventOptions): events.Rule; - - /** - * Defines a CloudWatch event rule triggered when this job moves to the TIMEOUT state. - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - onTimeout(id: string, options?: events.OnEventOptions): events.Rule; - - /** - * Create a CloudWatch metric. - * - * @param metricName name of the metric typically prefixed with `glue.driver.`, `glue..` or `glue.ALL.`. - * @param type the metric type. - * @param props metric options. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html - */ - metric(metricName: string, type: MetricType, props?: cloudwatch.MetricOptions): cloudwatch.Metric; - - /** - * Create a CloudWatch Metric indicating job success. - */ - metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric; - - /** - * Create a CloudWatch Metric indicating job failure. - */ - metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric; - - /** - * Create a CloudWatch Metric indicating job timeout. - */ - metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric; -} - -abstract class JobBase extends cdk.Resource implements IJob { - - public abstract readonly jobArn: string; - public abstract readonly jobName: string; - public abstract readonly grantPrincipal: iam.IPrincipal; - - /** - * Create a CloudWatch Event Rule for this Glue Job when it's in a given state - * - * @param id construct id - * @param options event options. Note that some values are overridden if provided, these are - * - eventPattern.source = ['aws.glue'] - * - eventPattern.detailType = ['Glue Job State Change', 'Glue Job Run Status'] - * - eventPattern.detail.jobName = [this.jobName] - * - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - */ - public onEvent(id: string, options: events.OnEventOptions = {}): events.Rule { - const rule = new events.Rule(this, id, options); - rule.addTarget(options.target); - rule.addEventPattern({ - source: ['aws.glue'], - detailType: ['Glue Job State Change', 'Glue Job Run Status'], - detail: { - jobName: [this.jobName], - }, - }); - return rule; - } - - /** - * Create a CloudWatch Event Rule for the transition into the input jobState. - * - * @param id construct id. - * @param jobState the job state. - * @param options optional event options. - */ - public onStateChange(id: string, jobState: JobState, options: events.OnEventOptions = {}): events.Rule { - const rule = this.onEvent(id, { - description: `Rule triggered when Glue job ${this.jobName} is in ${jobState} state`, - ...options, - }); - rule.addEventPattern({ - detail: { - state: [jobState], - }, - }); - return rule; - } - - /** - * Create a CloudWatch Event Rule matching JobState.SUCCEEDED. - * - * @param id construct id. - * @param options optional event options. default is {}. - */ - public onSuccess(id: string, options: events.OnEventOptions = {}): events.Rule { - return this.onStateChange(id, JobState.SUCCEEDED, options); - } - - /** - * Return a CloudWatch Event Rule matching FAILED state. - * - * @param id construct id. - * @param options optional event options. default is {}. - */ - public onFailure(id: string, options: events.OnEventOptions = {}): events.Rule { - return this.onStateChange(id, JobState.FAILED, options); - } - - /** - * Return a CloudWatch Event Rule matching TIMEOUT state. - * - * @param id construct id. - * @param options optional event options. default is {}. - */ - public onTimeout(id: string, options: events.OnEventOptions = {}): events.Rule { - return this.onStateChange(id, JobState.TIMEOUT, options); - } - - /** - * Create a CloudWatch metric. - * - * @param metricName name of the metric typically prefixed with `glue.driver.`, `glue..` or `glue.ALL.`. - * @param type the metric type. - * @param props metric options. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html - */ - public metric(metricName: string, type: MetricType, props?: cloudwatch.MetricOptions): cloudwatch.Metric { - return new cloudwatch.Metric({ - metricName, - namespace: 'Glue', - dimensionsMap: { - JobName: this.jobName, - JobRunId: 'ALL', - Type: type, - }, - ...props, - }).attachTo(this); - } - - /** - * Return a CloudWatch Metric indicating job success. - * - * This metric is based on the Rule returned by no-args onSuccess() call. - */ - public metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric { - return metricRule(this.metricJobStateRule('SuccessMetricRule', JobState.SUCCEEDED), props); - } - - /** - * Return a CloudWatch Metric indicating job failure. - * - * This metric is based on the Rule returned by no-args onFailure() call. - */ - public metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric { - return metricRule(this.metricJobStateRule('FailureMetricRule', JobState.FAILED), props); - } - - /** - * Return a CloudWatch Metric indicating job timeout. - * - * This metric is based on the Rule returned by no-args onTimeout() call. - */ - public metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric { - return metricRule(this.metricJobStateRule('TimeoutMetricRule', JobState.TIMEOUT), props); - } - - /** - * Creates or retrieves a singleton event rule for the input job state for use with the metric JobState methods. - * - * @param id construct id. - * @param jobState the job state. - * @private - */ - private metricJobStateRule(id: string, jobState: JobState): events.Rule { - return this.node.tryFindChild(id) as events.Rule ?? this.onStateChange(id, jobState); - } -} - -/** - * Properties for enabling Spark UI monitoring feature for Spark-based Glue jobs. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ -export interface SparkUIProps { - /** - * Enable Spark UI. - */ - readonly enabled: boolean; - - /** - * The bucket where the Glue job stores the logs. - * - * @default - a new bucket will be created. - */ - readonly bucket?: s3.IBucket; - - /** - * The path inside the bucket (objects prefix) where the Glue job stores the logs. - * Use format `'foo/bar/'` - * - * @default - the logs will be written at the root of the bucket - */ - readonly prefix?: string; -} - -/** - * The Spark UI logging location. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ -export interface SparkUILoggingLocation { - /** - * The bucket where the Glue job stores the logs. - * - * @default - a new bucket will be created. - */ - readonly bucket: s3.IBucket; - - /** - * The path inside the bucket (objects prefix) where the Glue job stores the logs. - * - * @default - the logs will be written at the root of the bucket - */ - readonly prefix?: string; -} - -/** - * Properties for enabling Continuous Logging for Glue Jobs. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ -export interface ContinuousLoggingProps { - /** - * Enable continouous logging. - */ - readonly enabled: boolean; - - /** - * Specify a custom CloudWatch log group name. - * - * @default - a log group is created with name `/aws-glue/jobs/logs-v2/`. - */ - readonly logGroup?: logs.ILogGroup; - - /** - * Specify a custom CloudWatch log stream prefix. - * - * @default - the job run ID. - */ - readonly logStreamPrefix?: string; - - /** - * Filter out non-useful Apache Spark driver/executor and Apache Hadoop YARN heartbeat log messages. - * - * @default true - */ - readonly quiet?: boolean; - - /** - * Apply the provided conversion pattern. - * - * This is a Log4j Conversion Pattern to customize driver and executor logs. - * - * @default `%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n` - */ - readonly conversionPattern?: string; -} - -/** - * Attributes for importing `Job`. - */ -export interface JobAttributes { - /** - * The name of the job. - */ - readonly jobName: string; - - /** - * The IAM role assumed by Glue to run this job. - * - * @default - undefined - */ - readonly role?: iam.IRole; -} - -/** - * Construction properties for `Job`. - */ -export interface JobProps { - /** - * The job's executable properties. - */ - readonly executable: JobExecutable; - - /** - * The name of the job. - * - * @default - a name is automatically generated - */ - readonly jobName?: string; - - /** - * The description of the job. - * - * @default - no value - */ - readonly description?: string; - - /** - * The number of AWS Glue data processing units (DPUs) that can be allocated when this job runs. - * Cannot be used for Glue version 2.0 and later - workerType and workerCount should be used instead. - * - * @default - 10 when job type is Apache Spark ETL or streaming, 0.0625 when job type is Python shell - */ - readonly maxCapacity?: number; - - /** - * The maximum number of times to retry this job after a job run fails. - * - * @default 0 - */ - readonly maxRetries?: number; - - /** - * The maximum number of concurrent runs allowed for the job. - * - * An error is returned when this threshold is reached. The maximum value you can specify is controlled by a service limit. - * - * @default 1 - */ - readonly maxConcurrentRuns?: number; - - /** - * The number of minutes to wait after a job run starts, before sending a job run delay notification. - * - * @default - no delay notifications - */ - readonly notifyDelayAfter?: cdk.Duration; - - /** - * The maximum time that a job run can consume resources before it is terminated and enters TIMEOUT status. - * - * @default cdk.Duration.hours(48) - */ - readonly timeout?: cdk.Duration; - - /** - * The type of predefined worker that is allocated when a job runs. - * - * @default - differs based on specific Glue version - */ - readonly workerType?: WorkerType; - - /** - * The number of workers of a defined `WorkerType` that are allocated when a job runs. - * - * @default - differs based on specific Glue version/worker type - */ - readonly workerCount?: number; - - /** - * The `Connection`s used for this job. - * - * Connections are used to connect to other AWS Service or resources within a VPC. - * - * @default [] - no connections are added to the job - */ - readonly connections?: IConnection[]; - - /** - * The `SecurityConfiguration` to use for this job. - * - * @default - no security configuration. - */ - readonly securityConfiguration?: ISecurityConfiguration; - - /** - * The default arguments for this job, specified as name-value pairs. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html for a list of reserved parameters - * @default - no arguments - */ - readonly defaultArguments?: { [key: string]: string }; - - /** - * The tags to add to the resources on which the job runs - * - * @default {} - no tags - */ - readonly tags?: { [key: string]: string }; - - /** - * The IAM role assumed by Glue to run this job. - * - * If providing a custom role, it needs to trust the Glue service principal (glue.amazonaws.com) and be granted sufficient permissions. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/getting-started-access.html - * - * @default - a role is automatically generated - */ - readonly role?: iam.IRole; - - /** - * Enables the collection of metrics for job profiling. - * Equivalent to a job parameter `--enable-metrics`. - * - * @default - no profiling metrics emitted. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly enableProfilingMetrics? :boolean; - - /** - * Enables the Spark UI debugging and monitoring with the specified props. - * - * @default - Spark UI debugging and monitoring is disabled. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly sparkUI?: SparkUIProps; - - /** - * Enables continuous logging with the specified props. - * - * @default - continuous logging is disabled. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - readonly continuousLogging?: ContinuousLoggingProps; - - /** - * The ExecutionClass whether the job is run with a standard or flexible execution class. - * - * @default - STANDARD - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html#aws-glue-api-jobs-job-Job - * @see https://docs.aws.amazon.com/glue/latest/dg/add-job.html - */ - readonly executionClass?: ExecutionClass; -} - -/** - * A Glue Job. - */ -export class Job extends JobBase { - /** - * Creates a Glue Job - * - * @param scope The scope creating construct (usually `this`). - * @param id The construct's id. - * @param attrs Import attributes - */ - public static fromJobAttributes(scope: constructs.Construct, id: string, attrs: JobAttributes): IJob { - class Import extends JobBase { - public readonly jobName = attrs.jobName; - public readonly jobArn = jobArn(scope, attrs.jobName); - public readonly grantPrincipal = attrs.role ?? new iam.UnknownPrincipal({ resource: this }); - } - - return new Import(scope, id); - } - - /** - * The ARN of the job. - */ - public readonly jobArn: string; - - /** - * The name of the job. - */ - public readonly jobName: string; - - /** - * The IAM role Glue assumes to run this job. - */ - public readonly role: iam.IRole; - - /** - * The principal this Glue Job is running as. - */ - public readonly grantPrincipal: iam.IPrincipal; - - /** - * The Spark UI logs location if Spark UI monitoring and debugging is enabled. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - public readonly sparkUILoggingLocation?: SparkUILoggingLocation; - - constructor(scope: constructs.Construct, id: string, props: JobProps) { - super(scope, id, { - physicalName: props.jobName, - }); - - const executable = props.executable.bind(); - - this.role = props.role ?? new iam.Role(this, 'ServiceRole', { - assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), - managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSGlueServiceRole')], - }); - this.grantPrincipal = this.role; - - const sparkUI = props.sparkUI?.enabled ? this.setupSparkUI(executable, this.role, props.sparkUI) : undefined; - this.sparkUILoggingLocation = sparkUI?.location; - const continuousLoggingArgs = props.continuousLogging?.enabled ? this.setupContinuousLogging(this.role, props.continuousLogging) : {}; - const profilingMetricsArgs = props.enableProfilingMetrics ? { '--enable-metrics': '' } : {}; - - const defaultArguments = { - ...this.executableArguments(executable), - ...continuousLoggingArgs, - ...profilingMetricsArgs, - ...sparkUI?.args, - ...this.checkNoReservedArgs(props.defaultArguments), - }; - - if (props.executionClass === ExecutionClass.FLEX) { - if (executable.type !== JobType.ETL) { - throw new Error('FLEX ExecutionClass is only available for JobType.ETL jobs'); - } - if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0].includes(executable.glueVersion)) { - throw new Error('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); - } - if (props.workerType && (props.workerType !== WorkerType.G_1X && props.workerType !== WorkerType.G_2X)) { - throw new Error('FLEX ExecutionClass is only available for WorkerType G_1X or G_2X'); - } - } - - let maxCapacity = props.maxCapacity; - if (maxCapacity !== undefined && (props.workerType && props.workerCount !== undefined)) { - throw new Error('maxCapacity cannot be used when setting workerType and workerCount'); - } - if (executable.type !== JobType.PYTHON_SHELL) { - if (maxCapacity !== undefined && ![GlueVersion.V0_9, GlueVersion.V1_0].includes(executable.glueVersion)) { - throw new Error('maxCapacity cannot be used when GlueVersion 2.0 or later'); - } - } else { - // max capacity validation for python shell jobs (defaults to 0.0625) - maxCapacity = maxCapacity ?? 0.0625; - if (maxCapacity !== 0.0625 && maxCapacity !== 1) { - throw new Error(`maxCapacity value must be either 0.0625 or 1 for JobType.PYTHON_SHELL jobs, received ${maxCapacity}`); - } - } - if ((!props.workerType && props.workerCount !== undefined) || (props.workerType && props.workerCount === undefined)) { - throw new Error('Both workerType and workerCount must be set'); - } - - const jobResource = new CfnJob(this, 'Resource', { - name: props.jobName, - description: props.description, - role: this.role.roleArn, - command: { - name: executable.type.name, - scriptLocation: this.codeS3ObjectUrl(executable.script), - pythonVersion: executable.pythonVersion, - runtime: executable.runtime ? executable.runtime.name : undefined, - }, - glueVersion: executable.glueVersion.name, - workerType: props.workerType?.name, - numberOfWorkers: props.workerCount, - maxCapacity: props.maxCapacity, - maxRetries: props.maxRetries, - executionClass: props.executionClass, - executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined, - notificationProperty: props.notifyDelayAfter ? { notifyDelayAfter: props.notifyDelayAfter.toMinutes() } : undefined, - timeout: props.timeout?.toMinutes(), - connections: props.connections ? { connections: props.connections.map((connection) => connection.connectionName) } : undefined, - securityConfiguration: props.securityConfiguration?.securityConfigurationName, - tags: props.tags, - defaultArguments, - }); - - const resourceName = this.getResourceNameAttribute(jobResource.ref); - this.jobArn = jobArn(this, resourceName); - this.jobName = resourceName; - } - - /** - * Check no usage of reserved arguments. - * - * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html - */ - private checkNoReservedArgs(defaultArguments?: { [key: string]: string }) { - if (defaultArguments) { - const reservedArgs = new Set(['--debug', '--mode', '--JOB_NAME']); - Object.keys(defaultArguments).forEach((arg) => { - if (reservedArgs.has(arg)) { - throw new Error(`The ${arg} argument is reserved by Glue. Don't set it`); - } - }); - } - return defaultArguments; - } - - private executableArguments(config: JobExecutableConfig) { - const args: { [key: string]: string } = {}; - args['--job-language'] = config.language; - if (config.className) { - args['--class'] = config.className; - } - if (config.extraJars && config.extraJars?.length > 0) { - args['--extra-jars'] = config.extraJars.map(code => this.codeS3ObjectUrl(code)).join(','); - } - if (config.extraPythonFiles && config.extraPythonFiles.length > 0) { - args['--extra-py-files'] = config.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(','); - } - if (config.s3PythonModules && config.s3PythonModules.length > 0) { - args['--s3-py-modules'] = config.s3PythonModules.map(code => this.codeS3ObjectUrl(code)).join(','); - } - if (config.extraFiles && config.extraFiles.length > 0) { - args['--extra-files'] = config.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(','); - } - if (config.extraJarsFirst) { - args['--user-jars-first'] = 'true'; - } - return args; - } - - private setupSparkUI(executable: JobExecutableConfig, role: iam.IRole, props: SparkUIProps) { - if (JobType.PYTHON_SHELL === executable.type) { - throw new Error('Spark UI is not available for JobType.PYTHON_SHELL jobs'); - } else if (JobType.RAY === executable.type) { - throw new Error('Spark UI is not available for JobType.RAY jobs'); - } - - this.validatePrefix(props.prefix); - const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket'); - bucket.grantReadWrite(role, this.cleanPrefixForGrant(props.prefix)); - const args = { - '--enable-spark-ui': 'true', - '--spark-event-logs-path': bucket.s3UrlForObject(props.prefix).replace(/\/?$/, '/'), // path will always end with a slash - }; - - return { - location: { - prefix: props.prefix, - bucket, - }, - args, - }; - } - - private validatePrefix(prefix?: string): void { - if (!prefix || cdk.Token.isUnresolved(prefix)) { - // skip validation if prefix is not specified or is a token - return; - } - - const errors: string[] = []; - - if (prefix.startsWith('/')) { - errors.push('Prefix must not begin with \'/\''); - } - - if (!prefix.endsWith('/')) { - errors.push('Prefix must end with \'/\''); - } - - if (errors.length > 0) { - throw new Error(`Invalid prefix format (value: ${prefix})${EOL}${errors.join(EOL)}`); - } - } - - private cleanPrefixForGrant(prefix?: string): string | undefined { - return prefix !== undefined ? `${prefix}*` : undefined; - } - - private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) { - const args: {[key: string]: string} = { - '--enable-continuous-cloudwatch-log': 'true', - '--enable-continuous-log-filter': (props.quiet ?? true).toString(), - }; - - if (props.logGroup) { - args['--continuous-log-logGroup'] = props.logGroup.logGroupName; - props.logGroup.grantWrite(role); - } - - if (props.logStreamPrefix) { - args['--continuous-log-logStreamPrefix'] = props.logStreamPrefix; - } - if (props.conversionPattern) { - args['--continuous-log-conversionPattern'] = props.conversionPattern; - } - return args; - } - - private codeS3ObjectUrl(code: Code) { - const s3Location = code.bind(this, this.role).s3Location; - return `s3://${s3Location.bucketName}/${s3Location.objectKey}`; - } -} - -/** - * Create a CloudWatch Metric that's based on Glue Job events. - * @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types - * The metric has namespace = 'AWS/Events', metricName = 'TriggeredRules' and RuleName = rule.ruleName dimension. - * - * @param rule for use in setting RuleName dimension value - * @param props metric properties - */ -function metricRule(rule: events.IRule, props?: cloudwatch.MetricOptions): cloudwatch.Metric { - return new cloudwatch.Metric({ - namespace: 'AWS/Events', - metricName: 'TriggeredRules', - dimensionsMap: { RuleName: rule.ruleName }, - statistic: cloudwatch.Statistic.SUM, - ...props, - }).attachTo(rule); -} - -/** - * Returns the job arn - * @param scope - * @param jobName - */ -function jobArn(scope: constructs.Construct, jobName: string) : string { - return cdk.Stack.of(scope).formatArn({ - service: 'glue', - resource: 'job', - resourceName: jobName, - }); -} diff --git a/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts index 45dd93d63a6b5..1f62dca7977bf 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/code.test.ts @@ -23,7 +23,7 @@ describe('Code', () => { bucket = s3.Bucket.fromBucketName(stack, 'Bucket', 'bucketname'); script = glue.Code.fromBucket(bucket, key); - const job = new glue.PythonShellJob(stack, 'Job1', { + new glue.PythonShellJob(stack, 'Job1', { script, role: new Role(stack, 'Role', { assumedBy: new ServicePrincipal('glue.amazonaws.com'), diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts deleted file mode 100644 index 91bf9bab212fc..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job.ts +++ /dev/null @@ -1,147 +0,0 @@ -import * as path from 'path'; -import * as cdk from 'aws-cdk-lib'; -import * as glue from '../lib'; - -/** - * To verify the ability to run jobs created in this test - * - * Run the job using - * `aws glue start-job-run --region us-east-1 --job-name ` - * This will return a runId - * - * Get the status of the job run using - * `aws glue get-job-run --region us-east-1 --job-name --run-id ` - * - * For example, to test the ShellJob - * - Run: `aws glue start-job-run --region us-east-1 --job-name ShellJob` - * - Get Status: `aws glue get-job-run --region us-east-1 --job-name ShellJob --run-id ` - * - Check output: `aws logs get-log-events --region us-east-1 --log-group-name "/aws-glue/python-jobs/output" --log-stream-name ">` which should show "hello world" - */ -const app = new cdk.App(); - -const stack = new cdk.Stack(app, 'aws-glue-job'); - -const script = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')); -const scriptResolveOptions = glue.Code.fromAsset(path.join(__dirname, 'job-script', 'resolve_options.py')); -const moduleUtils = glue.Code.fromAsset(path.join(__dirname, 'module', 'utils.zip')); - -[glue.GlueVersion.V2_0, glue.GlueVersion.V3_0, glue.GlueVersion.V4_0].forEach((glueVersion) => { - const etlJob = new glue.Job(stack, 'EtlJob' + glueVersion.name, { - jobName: 'EtlJob' + glueVersion.name, - executable: glue.JobExecutable.pythonEtl({ - pythonVersion: glue.PythonVersion.THREE, - glueVersion, - script, - }), - workerType: glue.WorkerType.G_1X, - workerCount: 10, - maxConcurrentRuns: 2, - maxRetries: 2, - timeout: cdk.Duration.minutes(5), - notifyDelayAfter: cdk.Duration.minutes(1), - defaultArguments: { - 'arg1': 'value1', - 'arg2': 'value2', - '--conf': 'valueConf', - }, - sparkUI: { - enabled: true, - }, - continuousLogging: { - enabled: true, - quiet: true, - logStreamPrefix: 'EtlJob', - }, - executionClass: glue.ExecutionClass.STANDARD, - tags: { - key: 'value', - }, - }); - etlJob.metricSuccess(); - new glue.Job(stack, 'StreamingJob' + glueVersion.name, { - jobName: 'StreamingJob' + glueVersion.name, - executable: glue.JobExecutable.pythonStreaming({ - pythonVersion: glue.PythonVersion.THREE, - glueVersion, - script, - }), - workerType: [glue.GlueVersion.V2_0].includes(glueVersion) ? glue.WorkerType.G_1X : glue.WorkerType.G_025X, - workerCount: 10, - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - sparkUI: { - enabled: true, - }, - tags: { - key: 'value', - }, - }); -}); - -new glue.Job(stack, 'ShellJob', { - jobName: 'ShellJob', - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - tags: { - key: 'value', - }, -}); - -new glue.Job(stack, 'ShellJob39', { - jobName: 'ShellJob39', - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - }), - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - tags: { - key: 'value', - }, -}); - -new glue.Job(stack, 'RayJob', { - jobName: 'RayJob', - executable: glue.JobExecutable.pythonRay({ - glueVersion: glue.GlueVersion.V4_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - s3PythonModules: [moduleUtils], - script: scriptResolveOptions, - }), - workerType: glue.WorkerType.Z_2X, - workerCount: 2, - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - tags: { - key: 'value', - }, -}); - -new glue.Job(stack, 'EtlJobWithFLEX', { - jobName: 'EtlJobWithFLEX', - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - workerType: glue.WorkerType.G_1X, - workerCount: 10, - executionClass: glue.ExecutionClass.FLEX, -}); - -app.synth(); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts deleted file mode 100644 index d00faa55091ba..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/job-executable.test.ts +++ /dev/null @@ -1,282 +0,0 @@ -import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as cdk from 'aws-cdk-lib'; -import * as glue from '../lib'; - -describe('GlueVersion', () => { - test('.V0_9 should set the name correctly', () => expect(glue.GlueVersion.V0_9.name).toEqual('0.9')); - - test('.V1_0 should set the name correctly', () => expect(glue.GlueVersion.V1_0.name).toEqual('1.0')); - - test('.V2_0 should set the name correctly', () => expect(glue.GlueVersion.V2_0.name).toEqual('2.0')); - - test('.V3_0 should set the name correctly', () => expect(glue.GlueVersion.V3_0.name).toEqual('3.0')); - - test('.V4_0 should set the name correctly', () => expect(glue.GlueVersion.V4_0.name).toEqual('4.0')); - - test('of(customVersion) should set the name correctly', () => expect(glue.GlueVersion.of('CustomVersion').name).toEqual('CustomVersion')); -}); - -describe('PythonVersion', () => { - test('.TWO should set the name correctly', () => expect(glue.PythonVersion.TWO).toEqual('2')); - - test('.THREE should set the name correctly', () => expect(glue.PythonVersion.THREE).toEqual('3')); - - test('.THREE_NINE should set the name correctly', () => expect(glue.PythonVersion.THREE_NINE).toEqual('3.9')); -}); - -describe('JobType', () => { - test('.ETL should set the name correctly', () => expect(glue.JobType.ETL.name).toEqual('glueetl')); - - test('.STREAMING should set the name correctly', () => expect(glue.JobType.STREAMING.name).toEqual('gluestreaming')); - - test('.PYTHON_SHELL should set the name correctly', () => expect(glue.JobType.PYTHON_SHELL.name).toEqual('pythonshell')); - - test('.RAY should set the name correctly', () => expect(glue.JobType.RAY.name).toEqual('glueray')); - - test('of(customName) should set the name correctly', () => expect(glue.JobType.of('CustomName').name).toEqual('CustomName')); -}); - -describe('JobExecutable', () => { - let stack: cdk.Stack; - let bucket: s3.IBucket; - let script: glue.Code; - - beforeEach(() => { - stack = new cdk.Stack(); - bucket = s3.Bucket.fromBucketName(stack, 'Bucket', 'bucketname'); - script = glue.Code.fromBucket(bucket, 'script.py'); - }); - - describe('.of()', () => { - test('with valid config should succeed', () => { - expect(glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V1_0, - type: glue.JobType.PYTHON_SHELL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE, - script, - })).toBeDefined(); - }); - - test('with JobType.PYTHON_SHELL and a language other than JobLanguage.PYTHON should throw', () => { - expect(() => glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V3_0, - type: glue.JobType.PYTHON_SHELL, - language: glue.JobLanguage.SCALA, - script, - })).toThrow(/Python shell requires the language to be set to Python/); - }); - - test('with JobType.of("pythonshell") and a language other than JobLanguage.PYTHON should throw', () => { - expect(() => glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V3_0, - type: glue.JobType.of('pythonshell'), - language: glue.JobLanguage.SCALA, - script, - })).toThrow(/Python shell requires the language to be set to Python/); - }); - - test('with JobType.of("glueray") and a language other than JobLanguage.PYTHON should throw', () => { - expect(() => glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V4_0, - type: glue.JobType.of('glueray'), - language: glue.JobLanguage.SCALA, - script, - })).toThrow(/Ray requires the language to be set to Python/); - }); - - test('with JobType.RAY and a language other than JobLanguage.PYTHON should throw', () => { - expect(() => glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V4_0, - type: glue.JobType.RAY, - language: glue.JobLanguage.SCALA, - script, - })).toThrow(/Ray requires the language to be set to Python/); - }); - - test('with a non JobLanguage.PYTHON and extraPythonFiles set should throw', () => { - expect(() => glue.JobExecutable.of({ - glueVersion: glue.GlueVersion.V3_0, - type: glue.JobType.ETL, - language: glue.JobLanguage.SCALA, - className: 'com.Test', - extraPythonFiles: [script], - script, - })).toThrow(/extraPythonFiles is not supported for languages other than JobLanguage.PYTHON/); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with JobType.PYTHON_SHELL and GlueVersion ${glueVersion} should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.PYTHON_SHELL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Python Shell`); - }); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with JobType.PYTHON_SHELL and GlueVersion.of("${glueVersion.name}") should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.PYTHON_SHELL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion: glue.GlueVersion.of(glueVersion.name), - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Python Shell`); - }); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0, glue.GlueVersion.V2_0, glue.GlueVersion.V3_0].forEach((glueVersion) => { - test(`with JobType.RAY and GlueVersion ${glueVersion} should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.RAY, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Ray`); - }); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0, glue.GlueVersion.V2_0, glue.GlueVersion.V3_0].forEach((glueVersion) => { - test(`with JobType.of("glueray") and GlueVersion ${glueVersion} should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.of('glueray'), - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support Ray`); - }); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0].forEach((glueVersion) => { - test(`with extraJarsFirst set and GlueVersion ${glueVersion.name} should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.ETL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - extraJarsFirst: true, - script, - glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support extraJarsFirst`); - }); - }); - - [glue.GlueVersion.V0_9, glue.GlueVersion.V1_0].forEach((glueVersion) => { - test(`with extraJarsFirst set and GlueVersion.of("${glueVersion.name}") should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.ETL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - extraJarsFirst: true, - script, - glueVersion: glue.GlueVersion.of(glueVersion.name), - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support extraJarsFirst`); - }); - }); - - [glue.GlueVersion.V2_0, glue.GlueVersion.V3_0, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with PythonVersion.TWO and GlueVersion ${glueVersion} should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.ETL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion, - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support PythonVersion 2`); - }); - }); - - [glue.GlueVersion.V2_0, glue.GlueVersion.V3_0, glue.GlueVersion.V4_0].forEach((glueVersion) => { - test(`with PythonVersion.TWO and GlueVersion.of("${glueVersion.name}") should throw`, () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.ETL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.TWO, - script, - glueVersion: glue.GlueVersion.of(glueVersion.name), - })).toThrow(`Specified GlueVersion ${glueVersion.name} does not support PythonVersion 2`); - }); - }); - - test('with PythonVersion set to PythonVersion.THREE_NINE and JobType etl should throw', () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.ETL, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - glueVersion: glue.GlueVersion.V1_0, - })).toThrow('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell'); - }); - - test('with PythonVersion PythonVersion.THREE_NINE and JobType pythonshell should succeed', () => { - expect(glue.JobExecutable.of({ - type: glue.JobType.PYTHON_SHELL, - glueVersion: glue.GlueVersion.V1_0, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - })).toBeDefined(); - }); - - test('with PythonVersion PythonVersion.THREE_NINE and JobType.of("pythonshell") should succeed', () => { - expect(glue.JobExecutable.of({ - type: glue.JobType.of('pythonshell'), - glueVersion: glue.GlueVersion.V1_0, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - })).toBeDefined(); - }); - - test('with PythonVersion PythonVersion.THREE_NINE and JobType ray should succeed', () => { - expect(glue.JobExecutable.of({ - type: glue.JobType.RAY, - glueVersion: glue.GlueVersion.V4_0, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - script, - })).toBeDefined(); - }); - - test('with PythonVersion PythonVersion.THREE_NINE and JobTypeof("glueray") should succeed', () => { - expect(glue.JobExecutable.of({ - type: glue.JobType.of('glueray'), - glueVersion: glue.GlueVersion.V4_0, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - script, - })).toBeDefined(); - }); - - test('with JobTypeof("glueray") and extraPythonFiles set should throw', () => { - expect(() => glue.JobExecutable.of({ - type: glue.JobType.of('glueray'), - glueVersion: glue.GlueVersion.V4_0, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - extraPythonFiles: [script], - script, - })).toThrow(/extraPythonFiles is not supported for Ray jobs/); - }); - - test('with JobType ray and s3PythonModules should succeed', () => { - expect(glue.JobExecutable.of({ - type: glue.JobType.of('glueray'), - glueVersion: glue.GlueVersion.V4_0, - language: glue.JobLanguage.PYTHON, - pythonVersion: glue.PythonVersion.THREE_NINE, - s3PythonModules: [script], - runtime: glue.Runtime.RAY_TWO_FOUR, - script, - })).toBeDefined(); - }); - }); -}); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts deleted file mode 100644 index 0e6db582c1d71..0000000000000 --- a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts +++ /dev/null @@ -1,1180 +0,0 @@ -import { EOL } from 'os'; -import { Template } from 'aws-cdk-lib/assertions'; -import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; -import * as events from 'aws-cdk-lib/aws-events'; -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as logs from 'aws-cdk-lib/aws-logs'; -import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as cdk from 'aws-cdk-lib'; -import * as glue from '../lib'; - -describe('WorkerType', () => { - test('.STANDARD should set the name correctly', () => expect(glue.WorkerType.STANDARD.name).toEqual('Standard')); - - test('.G_1X should set the name correctly', () => expect(glue.WorkerType.G_1X.name).toEqual('G.1X')); - - test('.G_2X should set the name correctly', () => expect(glue.WorkerType.G_2X.name).toEqual('G.2X')); - - test('.G_4X should set the name correctly', () => expect(glue.WorkerType.G_4X.name).toEqual('G.4X')); - - test('.G_8X should set the name correctly', () => expect(glue.WorkerType.G_8X.name).toEqual('G.8X')); - - test('.G_025X should set the name correctly', () => expect(glue.WorkerType.G_025X.name).toEqual('G.025X')); - - test('.Z_2X should set the name correctly', () => expect(glue.WorkerType.Z_2X.name).toEqual('Z.2X')); - - test('of(customType) should set name correctly', () => expect(glue.WorkerType.of('CustomType').name).toEqual('CustomType')); -}); - -describe('Job', () => { - const jobName = 'test-job'; - let stack: cdk.Stack; - - beforeEach(() => { - stack = new cdk.Stack(); - }); - - describe('.fromJobAttributes()', () => { - test('with required attrs only', () => { - const job = glue.Job.fromJobAttributes(stack, 'ImportedJob', { jobName }); - - expect(job.jobName).toEqual(jobName); - expect(job.jobArn).toEqual(stack.formatArn({ - service: 'glue', - resource: 'job', - resourceName: jobName, - })); - expect(job.grantPrincipal).toEqual(new iam.UnknownPrincipal({ resource: job })); - }); - - test('with all attrs', () => { - const role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); - const job = glue.Job.fromJobAttributes(stack, 'ImportedJob', { jobName, role }); - - expect(job.jobName).toEqual(jobName); - expect(job.jobArn).toEqual(stack.formatArn({ - service: 'glue', - resource: 'job', - resourceName: jobName, - })); - expect(job.grantPrincipal).toEqual(role); - }); - }); - - describe('new', () => { - const className = 'com.amazon.test.ClassName'; - const codeBucketName = 'bucketname'; - const codeBucketAccessStatement = { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - `:s3:::${codeBucketName}`, - ], - ], - }, - { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - `:s3:::${codeBucketName}/script`, - ], - ], - }, - ], - }; - let codeBucket: s3.IBucket; - let script: glue.Code; - let extraJars: glue.Code[]; - let extraFiles: glue.Code[]; - let extraPythonFiles: glue.Code[]; - let job: glue.Job; - let defaultProps: glue.JobProps; - - beforeEach(() => { - codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', codeBucketName); - script = glue.Code.fromBucket(codeBucket, 'script'); - extraJars = [glue.Code.fromBucket(codeBucket, 'file1.jar'), glue.Code.fromBucket(codeBucket, 'file2.jar')]; - extraPythonFiles = [glue.Code.fromBucket(codeBucket, 'file1.py'), glue.Code.fromBucket(codeBucket, 'file2.py')]; - extraFiles = [glue.Code.fromBucket(codeBucket, 'file1.txt'), glue.Code.fromBucket(codeBucket, 'file2.txt')]; - defaultProps = { - executable: glue.JobExecutable.scalaEtl({ - glueVersion: glue.GlueVersion.V2_0, - className, - script, - }), - }; - }); - - describe('with necessary props only', () => { - beforeEach(() => { - job = new glue.Job(stack, 'Job', defaultProps); - }); - - test('should create a role and use it with the job', () => { - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Role', { - AssumeRolePolicyDocument: { - Statement: [ - { - Action: 'sts:AssumeRole', - Effect: 'Allow', - Principal: { - Service: 'glue.amazonaws.com', - }, - }, - ], - Version: '2012-10-17', - }, - ManagedPolicyArns: [ - { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':iam::aws:policy/service-role/AWSGlueServiceRole', - ], - ], - }, - ], - }); - - // Role policy should grant reading from the assets bucket - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - codeBucketAccessStatement, - ], - }, - Roles: [ - { - Ref: 'JobServiceRole4F432993', - }, - ], - }); - - // check the job using the role - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - Command: { - Name: 'glueetl', - ScriptLocation: 's3://bucketname/script', - }, - Role: { - 'Fn::GetAtt': [ - 'JobServiceRole4F432993', - 'Arn', - ], - }, - }); - }); - - test('should return correct jobName and jobArn from CloudFormation', () => { - expect(stack.resolve(job.jobName)).toEqual({ Ref: 'JobB9D00F9F' }); - expect(stack.resolve(job.jobArn)).toEqual({ - 'Fn::Join': ['', ['arn:', { Ref: 'AWS::Partition' }, ':glue:', { Ref: 'AWS::Region' }, ':', { Ref: 'AWS::AccountId' }, ':job/', { Ref: 'JobB9D00F9F' }]], - }); - }); - - test('with a custom role should use it and set it in CloudFormation', () => { - const role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); - job = new glue.Job(stack, 'JobWithRole', { - ...defaultProps, - role, - }); - - expect(job.grantPrincipal).toEqual(role); - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - Role: role.roleArn, - }); - }); - - test('with a custom jobName should set it in CloudFormation', () => { - job = new glue.Job(stack, 'JobWithName', { - ...defaultProps, - jobName, - }); - - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - Name: jobName, - }); - }); - }); - - describe('enabling continuous logging with defaults', () => { - beforeEach(() => { - job = new glue.Job(stack, 'Job', { - ...defaultProps, - continuousLogging: { enabled: true }, - }); - }); - - test('should set minimal default arguments', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - DefaultArguments: { - '--enable-continuous-cloudwatch-log': 'true', - '--enable-continuous-log-filter': 'true', - }, - }); - }); - }); - - describe('enabling continuous logging with all props set', () => { - let logGroup; - - beforeEach(() => { - logGroup = logs.LogGroup.fromLogGroupName(stack, 'LogGroup', 'LogGroupName'); - job = new glue.Job(stack, 'Job', { - ...defaultProps, - continuousLogging: { - enabled: true, - quiet: false, - logStreamPrefix: 'LogStreamPrefix', - conversionPattern: '%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n', - logGroup, - }, - }); - }); - - test('should set all arguments', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - DefaultArguments: { - '--enable-continuous-cloudwatch-log': 'true', - '--enable-continuous-log-filter': 'false', - '--continuous-log-logGroup': 'LogGroupName', - '--continuous-log-logStreamPrefix': 'LogStreamPrefix', - '--continuous-log-conversionPattern': '%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n', - }, - }); - }); - - test('should grant cloudwatch log write permissions', () => { - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 'logs:CreateLogStream', - 'logs:PutLogEvents', - ], - Effect: 'Allow', - Resource: { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':logs:', - { - Ref: 'AWS::Region', - }, - ':', - { - Ref: 'AWS::AccountId', - }, - ':log-group:LogGroupName:*', - ], - ], - }, - }, - codeBucketAccessStatement, - ], - }, - Roles: [ - { - Ref: 'JobServiceRole4F432993', - }, - ], - }); - }); - }); - - describe('enabling execution class', () => { - describe('enabling execution class with FLEX', () => { - beforeEach(() => { - job = new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - }); - }); - - test('should set FLEX', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - ExecutionClass: 'FLEX', - }); - }); - }); - - describe('enabling execution class with FLEX and WorkerType G_1X', () => { - beforeEach(() => { - job = new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - workerType: glue.WorkerType.G_1X, - workerCount: 10, - }); - }); - - test('should set FLEX', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - ExecutionClass: 'FLEX', - WorkerType: 'G.1X', - }); - }); - }); - - describe('enabling execution class with FLEX and WorkerType G_2X', () => { - beforeEach(() => { - job = new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - workerType: glue.WorkerType.G_2X, - workerCount: 10, - }); - }); - - test('should set FLEX', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - ExecutionClass: 'FLEX', - WorkerType: 'G.2X', - }); - }); - }); - - describe('enabling execution class with STANDARD', () => { - beforeEach(() => { - job = new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.STANDARD, - }); - }); - - test('should set STANDARD', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - ExecutionClass: 'STANDARD', - }); - }); - }); - - describe('errors for execution class with FLEX', () => { - test('job type except JobType.ETL should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for JobType.ETL jobs'); - }); - - test('with glue version 0.9 should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V0_9, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); - }); - - test('with glue version 1.0 should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); - }); - - test('with glue version 2.0 should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for GlueVersion 3.0 or later'); - }); - - test('with G_025X as worker type that is neither G_1X nor G_2X should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - workerType: glue.WorkerType.G_025X, - workerCount: 2, - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for WorkerType G_1X or G_2X'); - }); - - test('with G_4X as worker type that is neither G_1X nor G_2X should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - workerType: glue.WorkerType.G_4X, - workerCount: 10, - executionClass: glue.ExecutionClass.FLEX, - })).toThrow('FLEX ExecutionClass is only available for WorkerType G_1X or G_2X'); - }); - }); - }); - - describe('enabling spark ui', () => { - describe('with no bucket or path provided', () => { - beforeEach(() => { - job = new glue.Job(stack, 'Job', { - ...defaultProps, - sparkUI: { enabled: true }, - }); - }); - - test('should create spark ui bucket', () => { - Template.fromStack(stack).resourceCountIs('AWS::S3::Bucket', 1); - }); - - test('should grant the role read/write permissions to the spark ui bucket', () => { - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - 's3:DeleteObject*', - 's3:PutObject', - 's3:PutObjectLegalHold', - 's3:PutObjectRetention', - 's3:PutObjectTagging', - 's3:PutObjectVersionTagging', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::GetAtt': [ - 'JobSparkUIBucket8E6A0139', - 'Arn', - ], - }, - { - 'Fn::Join': [ - '', - [ - { - 'Fn::GetAtt': [ - 'JobSparkUIBucket8E6A0139', - 'Arn', - ], - }, - '/*', - ], - ], - }, - ], - }, - codeBucketAccessStatement, - ], - Version: '2012-10-17', - }, - PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D', - Roles: [ - { - Ref: 'JobServiceRole4F432993', - }, - ], - }); - }); - - test('should set spark arguments on the job', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - DefaultArguments: { - '--enable-spark-ui': 'true', - '--spark-event-logs-path': { - 'Fn::Join': [ - '', - [ - 's3://', - { - Ref: 'JobSparkUIBucket8E6A0139', - }, - '/', - ], - ], - }, - }, - }); - }); - }); - - describe('with bucket provided', () => { - const sparkUIBucketName = 'sparkbucketname'; - let sparkUIBucket: s3.IBucket; - - beforeEach(() => { - sparkUIBucket = s3.Bucket.fromBucketName(stack, 'SparkBucketId', sparkUIBucketName); - job = new glue.Job(stack, 'Job', { - ...defaultProps, - sparkUI: { - enabled: true, - bucket: sparkUIBucket, - }, - }); - }); - - test('should grant the role read/write permissions to the provided spark ui bucket', () => { - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - 's3:DeleteObject*', - 's3:PutObject', - 's3:PutObjectLegalHold', - 's3:PutObjectRetention', - 's3:PutObjectTagging', - 's3:PutObjectVersionTagging', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':s3:::sparkbucketname', - ], - ], - }, - { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':s3:::sparkbucketname/*', - ], - ], - }, - ], - }, - codeBucketAccessStatement, - ], - }, - Roles: [ - { - Ref: 'JobServiceRole4F432993', - }, - ], - }); - }); - - test('should set spark arguments on the job', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - DefaultArguments: { - '--enable-spark-ui': 'true', - '--spark-event-logs-path': `s3://${sparkUIBucketName}/`, - }, - }); - }); - }); - describe('with bucket and path provided', () => { - const sparkUIBucketName = 'sparkbucketname'; - const prefix = 'foob/bart/'; - const badPrefix = '/foob/bart'; - let sparkUIBucket: s3.IBucket; - - const expectedErrors = [ - `Invalid prefix format (value: ${badPrefix})`, - 'Prefix must not begin with \'/\'', - 'Prefix must end with \'/\'', - ].join(EOL); - it('fails if path is mis-formatted', () => { - expect(() => new glue.Job(stack, 'BadPrefixJob', { - ...defaultProps, - sparkUI: { - enabled: true, - bucket: sparkUIBucket, - prefix: badPrefix, - }, - })).toThrow(expectedErrors); - }); - - beforeEach(() => { - sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName); - job = new glue.Job(stack, 'Job', { - ...defaultProps, - sparkUI: { - enabled: true, - bucket: sparkUIBucket, - prefix: prefix, - }, - }); - }); - - it('should grant the role read/write permissions spark ui bucket prefixed folder', () => { - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - 's3:DeleteObject*', - 's3:PutObject', - 's3:PutObjectLegalHold', - 's3:PutObjectRetention', - 's3:PutObjectTagging', - 's3:PutObjectVersionTagging', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::Join': [ - '', - [ - 'arn:', - { Ref: 'AWS::Partition' }, - ':s3:::sparkbucketname', - ], - ], - }, - { - 'Fn::Join': [ - '', - [ - 'arn:', - { Ref: 'AWS::Partition' }, - `:s3:::sparkbucketname/${prefix}*`, - ], - ], - }, - ], - }, - codeBucketAccessStatement, - ], - Version: '2012-10-17', - }, - PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D', - Roles: [{ Ref: 'JobServiceRole4F432993' }], - }); - }); - - it('should set spark arguments on the job', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - DefaultArguments: { - '--enable-spark-ui': 'true', - '--spark-event-logs-path': `s3://${sparkUIBucketName}/${prefix}`, - }, - }); - }); - }); - }); - - describe('with extended props', () => { - beforeEach(() => { - job = new glue.Job(stack, 'Job', { - ...defaultProps, - jobName, - description: 'test job', - workerType: glue.WorkerType.G_2X, - workerCount: 10, - maxConcurrentRuns: 2, - maxRetries: 2, - timeout: cdk.Duration.minutes(5), - notifyDelayAfter: cdk.Duration.minutes(1), - defaultArguments: { - arg1: 'value1', - arg2: 'value2', - }, - connections: [glue.Connection.fromConnectionName(stack, 'ImportedConnection', 'ConnectionName')], - securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'ImportedSecurityConfiguration', 'SecurityConfigurationName'), - enableProfilingMetrics: true, - tags: { - key: 'value', - }, - }); - }); - - test('should synthesize correctly', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - Command: { - Name: 'glueetl', - ScriptLocation: 's3://bucketname/script', - }, - Role: { - 'Fn::GetAtt': [ - 'JobServiceRole4F432993', - 'Arn', - ], - }, - DefaultArguments: { - '--job-language': 'scala', - '--class': 'com.amazon.test.ClassName', - '--enable-metrics': '', - 'arg1': 'value1', - 'arg2': 'value2', - }, - Description: 'test job', - ExecutionProperty: { - MaxConcurrentRuns: 2, - }, - GlueVersion: '2.0', - MaxRetries: 2, - Name: 'test-job', - NotificationProperty: { - NotifyDelayAfter: 1, - }, - NumberOfWorkers: 10, - Tags: { - key: 'value', - }, - Timeout: 5, - WorkerType: 'G.2X', - Connections: { - Connections: [ - 'ConnectionName', - ], - }, - SecurityConfiguration: 'SecurityConfigurationName', - }); - }); - }); - - test('with reserved args should throw', () => { - ['--debug', '--mode', '--JOB_NAME'].forEach((arg, index) => { - const defaultArguments: {[key: string]: string} = {}; - defaultArguments[arg] = 'random value'; - - expect(() => new glue.Job(stack, `Job${index}`, { - executable: glue.JobExecutable.scalaEtl({ - glueVersion: glue.GlueVersion.V2_0, - className, - script, - }), - defaultArguments, - })).toThrow(/argument is reserved by Glue/); - }); - }); - - describe('shell job', () => { - test('with unsupported glue version should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V0_9, - pythonVersion: glue.PythonVersion.TWO, - script, - }), - })).toThrow('Specified GlueVersion 0.9 does not support Python Shell'); - }); - - test('with unsupported Spark UI prop should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - sparkUI: { enabled: true }, - })).toThrow('Spark UI is not available for JobType.PYTHON_SHELL'); - }); - }); - - describe('ray job', () => { - test('with unsupported glue version should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonRay({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - script, - }), - workerType: glue.WorkerType.Z_2X, - workerCount: 2, - })).toThrow('Specified GlueVersion 3.0 does not support Ray'); - }); - - test('with unsupported Spark UI prop should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonRay({ - glueVersion: glue.GlueVersion.V4_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - script, - }), - workerType: glue.WorkerType.Z_2X, - workerCount: 2, - sparkUI: { enabled: true }, - })).toThrow('Spark UI is not available for JobType.RAY'); - }); - - test('without runtime should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonRay({ - glueVersion: glue.GlueVersion.V4_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script, - }), - workerType: glue.WorkerType.Z_2X, - workerCount: 2, - })).toThrow('Runtime is required for Ray jobs'); - }); - }); - - test('etl job with all props should synthesize correctly', () => { - new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - extraJarsFirst: true, - script, - extraPythonFiles, - extraJars, - extraFiles, - }), - }); - - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - GlueVersion: '2.0', - Command: { - Name: 'glueetl', - ScriptLocation: 's3://bucketname/script', - PythonVersion: '3', - }, - Role: { - 'Fn::GetAtt': [ - 'JobServiceRole4F432993', - 'Arn', - ], - }, - DefaultArguments: { - '--job-language': 'python', - '--extra-jars': 's3://bucketname/file1.jar,s3://bucketname/file2.jar', - '--extra-py-files': 's3://bucketname/file1.py,s3://bucketname/file2.py', - '--extra-files': 's3://bucketname/file1.txt,s3://bucketname/file2.txt', - '--user-jars-first': 'true', - }, - }); - }); - - test('streaming job with all props should synthesize correctly', () => { - new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.scalaStreaming({ - glueVersion: glue.GlueVersion.V2_0, - extraJarsFirst: true, - className, - script, - extraJars, - extraFiles, - }), - }); - - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - GlueVersion: '2.0', - Command: { - Name: 'gluestreaming', - ScriptLocation: 's3://bucketname/script', - }, - Role: { - 'Fn::GetAtt': [ - 'JobServiceRole4F432993', - 'Arn', - ], - }, - DefaultArguments: { - '--job-language': 'scala', - '--class': 'com.amazon.test.ClassName', - '--extra-jars': 's3://bucketname/file1.jar,s3://bucketname/file2.jar', - '--extra-files': 's3://bucketname/file1.txt,s3://bucketname/file2.txt', - '--user-jars-first': 'true', - }, - }); - }); - - describe('event rules and rule-based metrics', () => { - beforeEach(() => { - job = new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.scalaEtl({ - glueVersion: glue.GlueVersion.V2_0, - className, - script, - }), - }); - }); - - test('.onEvent() should create the expected event rule', () => { - job.onEvent('eventId', {}); - - Template.fromStack(stack).hasResourceProperties('AWS::Events::Rule', { - EventPattern: { - 'source': [ - 'aws.glue', - ], - 'detail-type': [ - 'Glue Job State Change', - 'Glue Job Run Status', - ], - 'detail': { - jobName: [ - { - Ref: 'JobB9D00F9F', - }, - ], - }, - }, - State: 'ENABLED', - }); - }); - - [ - { name: 'onSuccess()', invoke: (testJob: glue.Job) => testJob.onSuccess('SuccessRule'), state: 'SUCCEEDED' }, - { name: 'onFailure()', invoke: (testJob: glue.Job) => testJob.onFailure('FailureRule'), state: 'FAILED' }, - { name: 'onTimeout()', invoke: (testJob: glue.Job) => testJob.onTimeout('TimeoutRule'), state: 'TIMEOUT' }, - ].forEach((testCase) => { - test(`${testCase.name} should create a rule with correct properties`, () => { - testCase.invoke(job); - - Template.fromStack(stack).hasResourceProperties('AWS::Events::Rule', { - Description: { - 'Fn::Join': [ - '', - [ - 'Rule triggered when Glue job ', - { - Ref: 'JobB9D00F9F', - }, - ` is in ${testCase.state} state`, - ], - ], - }, - EventPattern: { - 'source': [ - 'aws.glue', - ], - 'detail-type': [ - 'Glue Job State Change', - 'Glue Job Run Status', - ], - 'detail': { - state: [ - testCase.state, - ], - jobName: [ - { - Ref: 'JobB9D00F9F', - }, - ], - }, - }, - State: 'ENABLED', - }); - }); - }); - - [ - { name: '.metricSuccess()', invoke: (testJob: glue.Job) => testJob.metricSuccess(), state: 'SUCCEEDED', ruleId: 'SuccessMetricRule' }, - { name: '.metricFailure()', invoke: (testJob: glue.Job) => testJob.metricFailure(), state: 'FAILED', ruleId: 'FailureMetricRule' }, - { name: '.metricTimeout()', invoke: (testJob: glue.Job) => testJob.metricTimeout(), state: 'TIMEOUT', ruleId: 'TimeoutMetricRule' }, - ].forEach((testCase) => { - test(`${testCase.name} should create the expected singleton event rule and corresponding metric`, () => { - const metric = testCase.invoke(job); - testCase.invoke(job); - - expect(metric).toEqual(new cloudwatch.Metric({ - dimensionsMap: { - RuleName: (job.node.findChild(testCase.ruleId) as events.Rule).ruleName, - }, - metricName: 'TriggeredRules', - namespace: 'AWS/Events', - statistic: 'Sum', - })); - - Template.fromStack(stack).resourceCountIs('AWS::Events::Rule', 1); - Template.fromStack(stack).hasResourceProperties('AWS::Events::Rule', { - Description: { - 'Fn::Join': [ - '', - [ - 'Rule triggered when Glue job ', - { - Ref: 'JobB9D00F9F', - }, - ` is in ${testCase.state} state`, - ], - ], - }, - EventPattern: { - 'source': [ - 'aws.glue', - ], - 'detail-type': [ - 'Glue Job State Change', - 'Glue Job Run Status', - ], - 'detail': { - state: [ - testCase.state, - ], - jobName: [ - { - Ref: 'JobB9D00F9F', - }, - ], - }, - }, - State: 'ENABLED', - }); - }); - }); - }); - - describe('.metric()', () => { - - test('with MetricType.COUNT should create a count sum metric', () => { - const metricName = 'glue.driver.aggregate.bytesRead'; - const props = { statistic: cloudwatch.Statistic.SUM }; - - expect(job.metric(metricName, glue.MetricType.COUNT, props)).toEqual(new cloudwatch.Metric({ - metricName, - statistic: 'Sum', - namespace: 'Glue', - dimensionsMap: { - JobName: job.jobName, - JobRunId: 'ALL', - Type: 'count', - }, - })); - }); - - test('with MetricType.GAUGE should create a gauge average metric', () => { - const metricName = 'glue.driver.BlockManager.disk.diskSpaceUsed_MB'; - const props = { statistic: cloudwatch.Statistic.AVERAGE }; - - expect(job.metric(metricName, glue.MetricType.GAUGE, props)).toEqual(new cloudwatch.Metric({ - metricName, - statistic: 'Average', - namespace: 'Glue', - dimensionsMap: { - JobName: job.jobName, - JobRunId: 'ALL', - Type: 'gauge', - }, - })); - }); - }); - - describe('validation for maxCapacity and workerType', () => { - test('maxCapacity with workerType and workerCount should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - maxCapacity: 10, - workerType: glue.WorkerType.G_1X, - workerCount: 10, - })).toThrow('maxCapacity cannot be used when setting workerType and workerCount'); - }); - - test('maxCapacity with GlueVersion 2.0 or later should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - maxCapacity: 10, - })).toThrow('maxCapacity cannot be used when GlueVersion 2.0 or later'); - }); - - test('maxCapacity with Python Shell jobs validation', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - maxCapacity: 10, - })).toThrow(/maxCapacity value must be either 0.0625 or 1 for JobType.PYTHON_SHELL jobs/); - }); - - test('workerType without workerCount should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - workerType: glue.WorkerType.G_1X, - })).toThrow('Both workerType and workerCount must be set'); - }); - - test('workerCount without workerType should throw', () => { - expect(() => new glue.Job(stack, 'Job', { - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V2_0, - pythonVersion: glue.PythonVersion.THREE, - script, - }), - workerCount: 10, - })).toThrow('Both workerType and workerCount must be set'); - }); - }); - }); -}); From a17a4df78b073167b6ce77ba4a38a48266ca85b7 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Thu, 11 Jul 2024 23:31:34 +0000 Subject: [PATCH 37/51] Updated snapshots for the jobs integ tests --- .../aws-glue-job-pysparkflex-etl.assets.json | 4 ++-- .../aws-glue-job-pysparkflex-etl.template.json | 2 ++ .../test/integ.job-pyspark-flex-etl.js.snapshot/manifest.json | 2 +- .../test/integ.job-pyspark-flex-etl.js.snapshot/tree.json | 2 ++ .../aws-glue-job-pyspark-streaming.assets.json | 4 ++-- .../aws-glue-job-pyspark-streaming.template.json | 2 ++ .../integ.job-pyspark-streaming.js.snapshot/manifest.json | 2 +- .../test/integ.job-pyspark-streaming.js.snapshot/tree.json | 2 ++ .../aws-glue-job-scalasparkflex-etl.assets.json | 4 ++-- .../aws-glue-job-scalasparkflex-etl.template.json | 2 ++ .../integ.job-scalaspark-flex-etl.js.snapshot/manifest.json | 2 +- .../test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json | 2 ++ .../aws-glue-job-scalaspark-streaming.assets.json | 4 ++-- .../aws-glue-job-scalaspark-streaming.template.json | 2 ++ .../integ.job-scalaspark-streaming.js.snapshot/manifest.json | 2 +- .../test/integ.job-scalaspark-streaming.js.snapshot/tree.json | 2 ++ .../integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json | 4 ++-- .../integ.ray-job.js.snapshot/aws-glue-ray-job.template.json | 2 ++ .../test/integ.ray-job.js.snapshot/manifest.json | 2 +- .../aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json | 2 ++ 20 files changed, 35 insertions(+), 15 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json index 69ac81cf61856..15de0107fd134 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff": { + "193dcee820d44a5de2c48d3e455195e1b19d1d4b1dea979dbacb4d90ecee8aec": { "source": { "path": "aws-glue-job-pysparkflex-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff.json", + "objectKey": "193dcee820d44a5de2c48d3e455195e1b19d1d4b1dea979dbacb4d90ecee8aec.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json index 971a1cb357e36..7b4cd1bbfdb80 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/aws-glue-job-pysparkflex-etl.template.json @@ -109,6 +109,7 @@ }, "DefaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -145,6 +146,7 @@ }, "DefaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/manifest.json index 8fb3110de8188..197580f722ebb 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/526212322ba7ab66ae5aab010091baff528275b87d212460e3ecff513c0b2eff.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/193dcee820d44a5de2c48d3e455195e1b19d1d4b1dea979dbacb4d90ecee8aec.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/tree.json index d3ebe4e6fb7ec..0ae8ba0cdb4a2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-flex-etl.js.snapshot/tree.json @@ -194,6 +194,7 @@ }, "defaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -248,6 +249,7 @@ }, "defaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json index 026c7f43a5905..241c8c0ce1b5e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.assets.json @@ -14,7 +14,7 @@ } } }, - "366e48db9f32a5807817c16accd9f4363d2ee0a5fc43c2a11c70bb0bbec71fe1": { + "f4cee6cf3c3f4fb0c83791808642b0391d7a1bd7c1aaa0fe0a8da2168bc0dd85": { "source": { "path": "aws-glue-job-pyspark-streaming.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "366e48db9f32a5807817c16accd9f4363d2ee0a5fc43c2a11c70bb0bbec71fe1.json", + "objectKey": "f4cee6cf3c3f4fb0c83791808642b0391d7a1bd7c1aaa0fe0a8da2168bc0dd85.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json index acf446817f955..b73eab962841f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/aws-glue-job-pyspark-streaming.template.json @@ -109,6 +109,7 @@ }, "DefaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -144,6 +145,7 @@ }, "DefaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json index 1c424568eeffe..70cb8893036f3 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/366e48db9f32a5807817c16accd9f4363d2ee0a5fc43c2a11c70bb0bbec71fe1.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/f4cee6cf3c3f4fb0c83791808642b0391d7a1bd7c1aaa0fe0a8da2168bc0dd85.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json index e5408f8b26d3c..05cbf25732dde 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-pyspark-streaming.js.snapshot/tree.json @@ -194,6 +194,7 @@ }, "defaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -247,6 +248,7 @@ }, "defaultArguments": { "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json index 246e028be6d6f..034678624b0c0 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.assets.json @@ -14,7 +14,7 @@ } } }, - "ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f": { + "8eb4431dd31801d6750894521b469099ec12fdf088e934030d0e8f4775aef416": { "source": { "path": "aws-glue-job-scalasparkflex-etl.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f.json", + "objectKey": "8eb4431dd31801d6750894521b469099ec12fdf088e934030d0e8f4775aef416.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json index f046068d73e7c..44a994406b023 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/aws-glue-job-scalasparkflex-etl.template.json @@ -109,6 +109,7 @@ "DefaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -145,6 +146,7 @@ "DefaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json index 76778efc60610..8b991a073dd5a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/ff1b08d04e7d65e42ead8e33a88a380c6678218b733d0b350cd0bea32ec2944f.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/8eb4431dd31801d6750894521b469099ec12fdf088e934030d0e8f4775aef416.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json index c8dc5ada88490..ed0b43e367bcd 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-flex-etl.js.snapshot/tree.json @@ -194,6 +194,7 @@ "defaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -248,6 +249,7 @@ "defaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json index f459de5343980..70bd2cbe00c89 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.assets.json @@ -14,7 +14,7 @@ } } }, - "c3b8ab6feb5555c17f340cd7ad1615f3d3ae8d9a61d1966df25b95256a6da475": { + "34ed620f765a71adfb1015fa87746014460ecb440ed6bbba8cf4ddcec0f5104e": { "source": { "path": "aws-glue-job-scalaspark-streaming.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "c3b8ab6feb5555c17f340cd7ad1615f3d3ae8d9a61d1966df25b95256a6da475.json", + "objectKey": "34ed620f765a71adfb1015fa87746014460ecb440ed6bbba8cf4ddcec0f5104e.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json index d44b72c813adb..71f0886daa41a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/aws-glue-job-scalaspark-streaming.template.json @@ -109,6 +109,7 @@ "DefaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -144,6 +145,7 @@ "DefaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json index 18e6da3fb4c07..c59b801fdf45b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/c3b8ab6feb5555c17f340cd7ad1615f3d3ae8d9a61d1966df25b95256a6da475.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/34ed620f765a71adfb1015fa87746014460ecb440ed6bbba8cf4ddcec0f5104e.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json index 896cee852e070..6e4736b728178 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-scalaspark-streaming.js.snapshot/tree.json @@ -194,6 +194,7 @@ "defaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -247,6 +248,7 @@ "defaultArguments": { "--job-language": "scala", "--class": "com.example.HelloWorld", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true", "arg1": "value1", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json index 263566b22517d..3b876e16c7915 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.assets.json @@ -14,7 +14,7 @@ } } }, - "19150b29a8e6a63aa62cb365cef33a8598dd853733842bde8c714f2193ca3e64": { + "88c38c39c4e4154ff32d6a619436c3605447e88e9f7b2917c0a4bdbec101913e": { "source": { "path": "aws-glue-ray-job.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "19150b29a8e6a63aa62cb365cef33a8598dd853733842bde8c714f2193ca3e64.json", + "objectKey": "88c38c39c4e4154ff32d6a619436c3605447e88e9f7b2917c0a4bdbec101913e.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json index 39957f8f30da6..1449533215f50 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/aws-glue-ray-job.template.json @@ -108,6 +108,7 @@ } }, "DefaultArguments": { + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -144,6 +145,7 @@ "DefaultArguments": { "arg1": "value1", "arg2": "value2", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json index b840069f6db88..87e43681bf422 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/19150b29a8e6a63aa62cb365cef33a8598dd853733842bde8c714f2193ca3e64.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/88c38c39c4e4154ff32d6a619436c3605447e88e9f7b2917c0a4bdbec101913e.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json index 29df1252455be..9d05cf81c41f3 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.ray-job.js.snapshot/tree.json @@ -193,6 +193,7 @@ "runtime": "Ray2.4" }, "defaultArguments": { + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, @@ -247,6 +248,7 @@ "defaultArguments": { "arg1": "value1", "arg2": "value2", + "--enable-continuous-cloudwatch-log": "true", "--enable-metrics": "", "--enable-observability-metrics": "true" }, From 28c97fdf1e59278fde09426faa4ffc82da510a38 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Thu, 11 Jul 2024 23:47:44 +0000 Subject: [PATCH 38/51] Final README update --- packages/@aws-cdk/aws-glue-alpha/README.md | 1638 ++++++++++++++------ 1 file changed, 1190 insertions(+), 448 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index 051044a74c8ff..9cb76fb062f59 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -17,580 +17,1322 @@ This module is part of the [AWS Cloud Development Kit](https://github.com/aws/aws-cdk) project. -## Job - -A `Job` encapsulates a script that connects to data sources, processes them, and then writes output to a data target. - -There are 3 types of jobs supported by AWS Glue: Spark ETL, Spark Streaming, and Python Shell jobs. - -The `glue.JobExecutable` allows you to specify the type of job, the language to use and the code assets required by the job. - -`glue.Code` allows you to refer to the different code assets required by the job, either from an existing S3 location or from a local file path. - -`glue.ExecutionClass` allows you to specify `FLEX` or `STANDARD`. `FLEX` is appropriate for non-urgent jobs such as pre-production jobs, testing, and one-time data loads. +## README + +[AWS Glue](https://aws.amazon.com/glue/) is a serverless data integration +service that makes it easier to discover, prepare, move, and integrate data +from multiple sources for analytics, machine learning (ML), and application +development. + +Wihout an L2 construct, developers define Glue data sources, connections, +jobs, and workflows for their data and ETL solutions via the AWS console, +the AWS CLI, and Infrastructure as Code tools like CloudFormation and the +CDK. However, there are several challenges to defining Glue resources at +scale that an L2 construct can resolve. First, developers must reference +documentation to determine the valid combinations of job type, Glue version, +worker type, language versions, and other parameters that are required for specific +job types. Additionally, developers must already know or look up the +networking constraints for data source connections, and there is ambiguity +around how to securely store secrets for JDBC connections. Finally, +developers want prescriptive guidance via best practice defaults for +throughput parameters like number of workers and batching. + +The Glue L2 construct has convenience methods working backwards from common +use cases and sets required parameters to defaults that align with recommended +best practices for each job type. It also provides customers with a balance +between flexibility via optional parameter overrides, and opinionated +interfaces that discouraging anti-patterns, resulting in reduced time to develop +and deploy new resources. + +### References + +* [Glue Launch Announcement](https://aws.amazon.com/blogs/aws/launch-aws-glue-now-generally-available/) +* [Glue Documentation](https://docs.aws.amazon.com/glue/index.html) +* [Glue L1 (CloudFormation) Constructs](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/AWS_Glue.html) +* Prior version of the [@aws-cdk/aws-glue-alpha module](https://github.com/aws/aws-cdk/blob/v2.51.1/packages/%40aws-cdk/aws-glue/README.md) + +## Create a Glue Job + +A Job encapsulates a script that connects to data sources, processes +them, and then writes output to a data target. There are four types of Glue +Jobs: Spark (ETL and Streaming), Python Shell, Ray, and Flex Jobs. Most +of the required parameters for these jobs are common across all types, +but there are a few differences depending on the languages supported +and features provided by each type. For all job types, the L2 defaults +to AWS best practice recommendations, such as: + +* Use of Secrets Manager for Connection JDBC strings +* Glue job autoscaling +* Default parameter values for Glue job creation + +This iteration of the L2 construct introduces breaking changes to +the existing glue-alpha-module, but these changes streamline the developer +experience, introduce new constants for defaults, and replacing synth-time +validations with interface contracts for enforcement of the parameter combinations +that Glue supports. As an opinionated construct, the Glue L2 construct does +not allow developers to create resources that use non-current versions +of Glue or deprecated language dependencies (e.g. deprecated versions of Python). +As always, L1s allow you to specify a wider range of parameters if you need +or want to use alternative configurations. + +Optional and required parameters for each job are enforced via interface +rather than validation; see [Glue's public documentation](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api.html) +for more granular details. ### Spark Jobs -These jobs run in an Apache Spark environment managed by AWS Glue. +1. **ETL Jobs** -#### ETL Jobs - -An ETL job processes data in batches using Apache Spark. +ETL jobs support pySpark and Scala languages, for which there are separate but +similar constructors. ETL jobs default to the G2 worker type, but you can +override this default with other supported worker type values (G1, G2, G4 +and G8). ETL jobs defaults to Glue version 4.0, which you can override to 3.0. +The following ETL features are enabled by default: +`—enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log.` +You can find more details about version, worker type and other features in +[Glue's public documentation](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html). ```ts -declare const bucket: s3.Bucket; -new glue.Job(this, 'ScalaSparkEtlJob', { - executable: glue.JobExecutable.scalaEtl({ - glueVersion: glue.GlueVersion.V4_0, - script: glue.Code.fromBucket(bucket, 'src/com/example/HelloWorld.scala'), +glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-jar'), className: 'com.example.HelloWorld', - extraJars: [glue.Code.fromBucket(bucket, 'jars/HelloWorld.jar')], - }), - workerType: glue.WorkerType.G_8X, - description: 'an example Scala ETL job', + role: iam.IRole, }); -``` -#### Streaming Jobs +glue.pySparkEtlJob(this, 'pySparkEtlJob', { + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + role: iam.IRole, +}); +``` -A Streaming job is similar to an ETL job, except that it performs ETL on data streams. It uses the Apache Spark Structured Streaming framework. Some Spark job features are not available to streaming ETL jobs. +Optional override examples: ```ts -new glue.Job(this, 'PythonSparkStreamingJob', { - executable: glue.JobExecutable.pythonStreaming({ - glueVersion: glue.GlueVersion.V4_0, - pythonVersion: glue.PythonVersion.THREE, - script: glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')), - }), - description: 'an example Python Streaming job', +glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { + glueVersion: glue.GlueVersion.V3_0, + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-script'), + className: 'com.example.HelloWorld', + extraJars: [glue.Code.fromBucket('bucket-name', 'path-to-extra-jars'),], + description: 'an example Scala Spark ETL job', + numberOfWorkers: 20, + workerType: glue.WorkerType.G8X, + timeout: cdk.Duration.minutes(15), + role: iam.IRole, }); -``` - -### Python Shell Jobs -A Python shell job runs Python scripts as a shell and supports a Python version that depends on the AWS Glue version you are using. -This can be used to schedule and run tasks that don't require an Apache Spark environment. Currently, three flavors are supported: +glue.pySparkEtlJob(this, 'pySparkEtlJob', { + jobType: glue.JobType.ETL, + glueVersion: glue.GlueVersion.V3_0, + pythonVersion: glue.PythonVersion.3_9, + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + description: 'an example pySpark ETL job', + numberOfWorkers: 20, + workerType: glue.WorkerType.G8X, + timeout: cdk.Duration.minutes(15), + role: iam.IRole, +}); +``` -* PythonVersion.TWO (2.7; EOL) -* PythonVersion.THREE (3.6) -* PythonVersion.THREE_NINE (3.9) +Scala Spark ETL Job Property Interface: ```ts -declare const bucket: s3.Bucket; -new glue.Job(this, 'PythonShellJob', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE, - script: glue.Code.fromBucket(bucket, 'script.py'), - }), - description: 'an example Python Shell job', -}); +ScalaSparkEtlJobProps{ + /** + * Script Code Location (required) + * Script to run when the Glue Job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + * */ + script: glue.Code; + + /** + * Class name (required for Scala) + * Package and class name for the entry point of Glue job execution for + * Java scripts + * */ + className: string; + + /** + * Extra Jars S3 URL (optional) + * S3 URL where additional jar dependencies are located + */ + extraJars?: string[]; + + /** + * IAM Role (required) + * IAM Role to use for Glue job execution + * Must be specified by the developer because the L2 doesn't have visibility + * into the actions the script(s) take during the job execution + * */ + role: iam.IRole; + + /** + * Name of the Glue job (optional) + * Developer-specified name of the Glue job + * */ + name?: string; + + /** + * Description (optional) + * Developer-specified description of the Glue job + * */ + description?: string; + + /** + * Number of Workers (optional) + * Number of workers for Glue to use during job execution + * @default 10 + * */ + numberOrWorkers?: int; + + /** + * Worker Type (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X + * @default G_2X + * */ + workerType?: glue.WorkerType; + + /** + * Max Concurrent Runs (optional) + * The maximum number of runs this Glue job can concurrently run + * @default 1 + * */ + maxConcurrentRuns?: int; + + /** + * Default Arguments (optional) + * The default arguments for every run of this Glue job, + * specified as name-value pairs. + * */ + defaultArguments?: {[key: string], string }[]; + + /** + * Connections (optional) + * List of connections to use for this Glue job + * */ + connections?: IConnection[]; + + /** + * Max Retries (optional) + * Maximum number of retry attempts Glue performs + * if the job fails + * @default 0 + * */ + maxRetries?: int; + + /** + * Timeout (optional) + * Timeout for the Glue job, specified in minutes + * @default 2880 (2 days for non-streaming) + * */ + timeout?: cdk.Duration; + + /** + * Security Configuration (optional) + * Defines the encryption options for the Glue job + * */ + securityConfiguration?: ISecurityConfiguration; + + /** + * Tags (optional) + * A list of key:value pairs of tags to apply to this Glue job resource + * */ + tags?: {[key: string], string }[]; + + /** + * Glue Version + * The version of Glue to use to execute this job + * @default 3.0 for ETL + * */ + glueVersion?: glue.GlueVersion; +} ``` -### Ray Jobs - -These jobs run in a Ray environment managed by AWS Glue. +pySpark ETL Job Property Interface: ```ts -new glue.Job(this, 'RayJob', { - executable: glue.JobExecutable.pythonRay({ - glueVersion: glue.GlueVersion.V4_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - runtime: glue.Runtime.RAY_TWO_FOUR, - script: glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')), - }), - workerType: glue.WorkerType.Z_2X, - workerCount: 2, - description: 'an example Ray job' -}); +pySparkEtlJobProps{ + /** + * Script Code Location (required) + * Script to run when the Glue job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + * */ + script: glue.Code; + + /** + * IAM Role (required) + * IAM Role to use for Glue job execution + * Must be specified by the developer because the L2 doesn't have visibility + * into the actions the script(s) takes during the job execution + * */ + role: iam.IRole; + + /** + * Name of the Glue job (optional) + * Developer-specified name of the Glue job + * */ + name?: string; + + /** + * Description (optional) + * Developer-specified description of the Glue job + * */ + description?: string; + + /** + * Extra Jars S3 URL (optional) + * S3 URL where additional jar dependencies are located + */ + extraJars?: string[]; + + /** + * Number of Workers (optional) + * Number of workers for Glue to use during job execution + * @default 10 + * */ + numberOrWorkers?: int; + + /** + * Worker Type (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X + * @default G_2X + * */ + workerType?: glue.WorkerType; + + /** + * Max Concurrent Runs (optional) + * The maximum number of runs this Glue job can concurrently run + * @default 1 + * */ + maxConcurrentRuns?: int; + + /** + * Default Arguments (optional) + * The default arguments for every run of this Glue job, + * specified as name-value pairs. + * */ + defaultArguments?: {[key: string], string }[]; + + /** + * Connections (optional) + * List of connections to use for this Glue job + * */ + connections?: IConnection[]; + + /** + * Max Retries (optional) + * Maximum number of retry attempts Glue performs + * if the job fails + * @default 0 + * */ + maxRetries?: int; + + /** + * Timeout (optional) + * Timeout for the Glue job, specified in minutes + * @default 2880 (2 days for non-streaming) + * */ + timeout?: cdk.Duration; + + /** + * Security Configuration (optional) + * Defines the encryption options for the Glue job + * */ + securityConfiguration?: ISecurityConfiguration; + + /** + * Tags (optional) + * A list of key:value pairs of tags to apply to this Glue job resource + * */ + tags?: {[key: string], string }[]; + + /** + * Glue Version + * The version of Glue to use to execute this job + * @default 3.0 for ETL + * */ + glueVersion?: glue.GlueVersion; +} ``` -### Enable Spark UI +2. **Streaming Jobs** -Enable Spark UI setting the `sparkUI` property. +Streaming jobs are similar to ETL jobs, except that they perform ETL on data +streams using the Apache Spark Structured Streaming framework. Some Spark +job features are not available to Streaming ETL jobs. They support Scala +and pySpark languages. PySpark streaming jobs default Python 3.9, +which you can override with any non-deprecated version of Python. It +defaults to the G2 worker type and Glue 4.0, both of which you can override. +The following best practice features are enabled by default: +`—enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log`. ```ts -new glue.Job(this, 'EnableSparkUI', { - jobName: 'EtlJobWithSparkUIPrefix', - sparkUI: { - enabled: true, - }, - executable: glue.JobExecutable.pythonEtl({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE, - script: glue.Code.fromAsset(path.join(__dirname, 'job-script', 'hello_world.py')), - }), +new glue.pySparkStreamingJob(this, 'pySparkStreamingJob', { + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + role: iam.IRole, }); -``` -The `sparkUI` property also allows the specification of an s3 bucket and a bucket prefix. -See [documentation](https://docs.aws.amazon.com/glue/latest/dg/add-job.html) for more information on adding jobs in Glue. +new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-jar'), + className: 'com.example.HelloWorld', + role: iam.IRole, +}); -## Connection +``` -A `Connection` allows Glue jobs, crawlers and development endpoints to access certain types of data stores. For example, to create a network connection to connect to a data source within a VPC: +Optional override examples: ```ts -declare const securityGroup: ec2.SecurityGroup; -declare const subnet: ec2.Subnet; -new glue.Connection(this, 'MyConnection', { - type: glue.ConnectionType.NETWORK, - // The security groups granting AWS Glue inbound access to the data source within the VPC - securityGroups: [securityGroup], - // The VPC subnet which contains the data source - subnet, +new glue.pySparkStreamingJob(this, 'pySparkStreamingJob', { + glueVersion: glue.GlueVersion.V3_0, + pythonVersion: glue.PythonVersion.3_9, + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + description: 'an example Python Streaming job', + numberOfWorkers: 20, + workerType: glue.WorkerType.G8X, + timeout: cdk.Duration.minutes(15), + role: iam.IRole, +}); + +new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { + glueVersion: glue.GlueVersion.V3_0, + pythonVersion: glue.PythonVersion.3_9, + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-script'), + extraJars: [glue.Code.fromBucket('bucket-name', 'path-to-extra-jars'),], + className: 'com.example.HelloWorld', + description: 'an example Python Streaming job', + numberOfWorkers: 20, + workerType: glue.WorkerType.G8X, + timeout: cdk.Duration.minutes(15), + role: iam.IRole, }); ``` -For RDS `Connection` by JDBC, it is recommended to manage credentials using AWS Secrets Manager. To use Secret, specify `SECRET_ID` in `properties` like the following code. Note that in this case, the subnet must have a route to the AWS Secrets Manager VPC endpoint or to the AWS Secrets Manager endpoint through a NAT gateway. +Scala Spark Streaming Job Property Interface: ```ts -declare const securityGroup: ec2.SecurityGroup; -declare const subnet: ec2.Subnet; -declare const db: rds.DatabaseCluster; -new glue.Connection(this, "RdsConnection", { - type: glue.ConnectionType.JDBC, - securityGroups: [securityGroup], - subnet, - properties: { - JDBC_CONNECTION_URL: `jdbc:mysql://${db.clusterEndpoint.socketAddress}/databasename`, - JDBC_ENFORCE_SSL: "false", - SECRET_ID: db.secret!.secretName, - }, -}); +ScalaSparkStreamingJobProps{ + /** + * Script Code Location (required) + * Script to run when the Glue job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + * */ + script: glue.Code; + + /** + * Class name (required for Scala scripts) + * Package and class name for the entry point of Glue job execution for + * Java scripts + * */ + className: string; + + /** + * IAM Role (required) + * IAM Role to use for Glue job execution + * Must be specified by the developer because the L2 doesn't have visibility + * into the actions the script(s) take during the job execution + * */ + role: iam.IRole; + + /** + * Name of the Glue job (optional) + * Developer-specified name of the Glue job + * */ + name?: string; + + /** + * Extra Jars S3 URL (optional) + * S3 URL where additional jar dependencies are located + */ + extraJars?: string[]; + + /** + * Description (optional) + * Developer-specified description of the Glue job + * */ + description?: string; + + /** + * Number of Workers (optional) + * Number of workers for Glue to use during job execution + * @default 10 + * */ + numberOrWorkers?: int; + + /** + * Worker Type (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X + * @default G_2X + * */ + workerType?: glue.WorkerType; + + /** + * Max Concurrent Runs (optional) + * The maximum number of runs this Glue job can concurrently run + * @default 1 + * */ + maxConcurrentRuns?: int; + + /** + * Default Arguments (optional) + * The default arguments for every run of this Glue job, + * specified as name-value pairs. + * */ + defaultArguments?: {[key: string], string }[]; + + /** + * Connections (optional) + * List of connections to use for this Glue job + * */ + connections?: IConnection[]; + + /** + * Max Retries (optional) + * Maximum number of retry attempts Glue performs + * if the job fails + * @default 0 + * */ + maxRetries?: int; + + /** + * Timeout (optional) + * Timeout for the Glue job, specified in minutes + * */ + timeout?: cdk.Duration; + + /** + * Security Configuration (optional) + * Defines the encryption options for the Glue job + * */ + securityConfiguration?: ISecurityConfiguration; + + /** + * Tags (optional) + * A list of key:value pairs of tags to apply to this Glue job resource + * */ + tags?: {[key: string], string }[]; + + /** + * Glue Version + * The version of Glue to use to execute this job + * @default 3.0 + * */ + glueVersion?: glue.GlueVersion; +} ``` -If you need to use a connection type that doesn't exist as a static member on `ConnectionType`, you can instantiate a `ConnectionType` object, e.g: `new glue.ConnectionType('NEW_TYPE')`. +pySpark Streaming Job Property Interface: -See [Adding a Connection to Your Data Store](https://docs.aws.amazon.com/glue/latest/dg/populate-add-connection.html) and [Connection Structure](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-connections.html#aws-glue-api-catalog-connections-Connection) documentation for more information on the supported data stores and their configurations. +```ts +pySparkStreamingJobProps{ + /** + * Script Code Location (required) + * Script to run when the Glue job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + * */ + script: glue.Code; + + /** + * IAM Role (required) + * IAM Role to use for Glue job execution + * Must be specified by the developer because the L2 doesn't have visibility + * into the actions the script(s) take during the job execution + * */ + role: iam.IRole; + + /** + * Name of the Glue job (optional) + * Developer-specified name of the Glue job + * */ + name?: string; + + /** + * Description (optional) + * Developer-specified description of the Glue job + * */ + description?: string; + + /** + * Number of Workers (optional) + * Number of workers for Glue to use during job execution + * @default 10 + * */ + numberOrWorkers?: int; + + /** + * Worker Type (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X + * @default G_2X + * */ + workerType?: glue.WorkerType; + + /** + * Max Concurrent Runs (optional) + * The maximum number of runs this Glue job can concurrently run + * @default 1 + * */ + maxConcurrentRuns?: int; + + /** + * Default Arguments (optional) + * The default arguments for every run of this Glue job, + * specified as name-value pairs. + * */ + defaultArguments?: {[key: string], string }[]; + + /** + * Connections (optional) + * List of connections to use for this Glue job + * */ + connections?: IConnection[]; + + /** + * Max Retries (optional) + * Maximum number of retry attempts Glue perform + * if the job fails + * @default 0 + * */ + maxRetries?: int; + + /** + * Timeout (optional) + * Timeout for the Glue job, specified in minutes + * */ + timeout?: cdk.Duration; + + /** + * Security Configuration (optional) + * Defines the encryption options for the Glue job + * */ + securityConfiguration?: ISecurityConfiguration; + + /** + * Tags (optional) + * A list of key:value pairs of tags to apply to this Glue job resource + * */ + tags?: {[key: string], string }[]; + + /** + * Glue Version + * The version of Glue to use to execute this job + * @default 3.0 + * */ + glueVersion?: glue.GlueVersion; +} +``` -## SecurityConfiguration +3. **Flex Jobs** -A `SecurityConfiguration` is a set of security properties that can be used by AWS Glue to encrypt data at rest. +The flexible execution class is appropriate for non-urgent jobs such as +pre-production jobs, testing, and one-time data loads. Flexible jobs default +to Glue version 3.0 and worker type `G_2X`. The following best practice +features are enabled by default: +`—enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log` ```ts -new glue.SecurityConfiguration(this, 'MySecurityConfiguration', { - cloudWatchEncryption: { - mode: glue.CloudWatchEncryptionMode.KMS, - }, - jobBookmarksEncryption: { - mode: glue.JobBookmarksEncryptionMode.CLIENT_SIDE_KMS, - }, - s3Encryption: { - mode: glue.S3EncryptionMode.KMS, - }, +glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-jar'), + className: 'com.example.HelloWorld', + role: iam.IRole, }); -``` - -By default, a shared KMS key is created for use with the encryption configurations that require one. You can also supply your own key for each encryption config, for example, for CloudWatch encryption: -```ts -declare const key: kms.Key; -new glue.SecurityConfiguration(this, 'MySecurityConfiguration', { - cloudWatchEncryption: { - mode: glue.CloudWatchEncryptionMode.KMS, - kmsKey: key, - }, +glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + role: iam.IRole, }); ``` -See [documentation](https://docs.aws.amazon.com/glue/latest/dg/encryption-security-configuration.html) for more info for Glue encrypting data written by Crawlers, Jobs, and Development Endpoints. - -## Database - -A `Database` is a logical grouping of `Tables` in the Glue Catalog. +Optional override examples: ```ts -new glue.Database(this, 'MyDatabase', { - databaseName: 'my_database', - description: 'my_database_description', +glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { + glueVersion: glue.GlueVersion.V3_0, + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-script'), + className: 'com.example.HelloWorld', + extraJars: [glue.Code.fromBucket('bucket-name', 'path-to-extra-jars')], + description: 'an example pySpark ETL job', + numberOfWorkers: 20, + workerType: glue.WorkerType.G8X, + timeout: cdk.Duration.minutes(15), + role: iam.IRole, }); -``` -## Table +new glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { + glueVersion: glue.GlueVersion.V3_0, + pythonVersion: glue.PythonVersion.3_9, + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + description: 'an example Flex job', + numberOfWorkers: 20, + workerType: glue.WorkerType.G8X, + timeout: cdk.Duration.minutes(15), + role: iam.IRole, +}); +``` -A Glue table describes a table of data in S3: its structure (column names and types), location of data (S3 objects with a common prefix in a S3 bucket), and format for the files (Json, Avro, Parquet, etc.): +Scala Spark Flex Job Property Interface: ```ts -declare const myDatabase: glue.Database; -new glue.S3Table(this, 'MyTable', { - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }, { - name: 'col2', - type: glue.Schema.array(glue.Schema.STRING), - comment: 'col2 is an array of strings' // comment is optional - }], - dataFormat: glue.DataFormat.JSON, -}); +ScalaSparkFlexJobProps{ + /** + * Script Code Location (required) + * Script to run when the Glue job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + * */ + script: glue.Code; + + /** + * Class name (required for Scala scripts) + * Package and class name for the entry point of Glue job execution for + * Java scripts + * */ + className: string; + + /** + * Extra Jars S3 URL (optional) + * S3 URL where additional jar dependencies are located + */ + extraJars?: string[]; + + /** + * IAM Role (required) + * IAM Role to use for Glue job execution + * Must be specified by the developer because the L2 doesn't have visibility + * into the actions the script(s) take during the job execution + * */ + role: iam.IRole; + + /** + * Name of the Glue job (optional) + * Developer-specified name of the Glue job + * */ + name?: string; + + /** + * Description (optional) + * Developer-specified description of the Glue job + * */ + description?: string; + + /** + * Number of Workers (optional) + * Number of workers for Glue to use during job execution + * @default 10 + * */ + numberOrWorkers?: int; + + /** + * Worker Type (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X + * @default G_2X + * */ + workerType?: glue.WorkerType; + + /** + * Max Concurrent Runs (optional) + * The maximum number of runs this Glue job can concurrently run + * @default 1 + * */ + maxConcurrentRuns?: int; + + /** + * Default Arguments (optional) + * The default arguments for every run of this Glue job, + * specified as name-value pairs. + * */ + defaultArguments?: {[key: string], string }[]; + + /** + * Connections (optional) + * List of connections to use for this Glue job + * */ + connections?: IConnection[]; + + /** + * Max Retries (optional) + * Maximum number of retry attempts Glue perform + * if the job fails + * @default 0 + * */ + maxRetries?: int; + + /** + * Timeout (optional) + * Timeout for the Glue job, specified in minutes + * @default 2880 (2 days for non-streaming) + * */ + timeout?: cdk.Duration; + + /** + * Security Configuration (optional) + * Defines the encryption options for the Glue job + * */ + securityConfiguration?: ISecurityConfiguration; + + /** + * Tags (optional) + * A list of key:value pairs of tags to apply to this Glue job resource + * */ + tags?: {[key: string], string }[]; + + /** + * Glue Version + * The version of Glue to use to execute this job + * @default 3.0 + * */ + glueVersion?: glue.GlueVersion; +} ``` -By default, a S3 bucket will be created to store the table's data but you can manually pass the `bucket` and `s3Prefix`: +pySpark Flex Job Property Interface: ```ts -declare const myBucket: s3.Bucket; -declare const myDatabase: glue.Database; -new glue.S3Table(this, 'MyTable', { - bucket: myBucket, - s3Prefix: 'my-table/', - // ... - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - dataFormat: glue.DataFormat.JSON, -}); +PySparkFlexJobProps{ + /** + * Script Code Location (required) + * Script to run when the Glue job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + * */ + script: glue.Code; + + /** + * IAM Role (required) + * IAM Role to use for Glue job execution + * Must be specified by the developer because the L2 doesn't have visibility + * into the actions the script(s) take during the job execution + * */ + role: iam.IRole; + + /** + * Name of the Glue job (optional) + * Developer-specified name of the Glue job + * */ + name?: string; + + /** + * Description (optional) + * Developer-specified description of the Glue job + * */ + description?: string; + + /** + * Number of Workers (optional) + * Number of workers for Glue to use during job execution + * @default 10 + * */ + numberOrWorkers?: int; + + /** + * Worker Type (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X + * @default G_2X + * */ + workerType?: glue.WorkerType; + + /** + * Max Concurrent Runs (optional) + * The maximum number of runs this Glue job can concurrently run + * @default 1 + * */ + maxConcurrentRuns?: int; + + /** + * Default Arguments (optional) + * The default arguments for every run of this Glue job, + * specified as name-value pairs. + * */ + defaultArguments?: {[key: string], string }[]; + + /** + * Connections (optional) + * List of connections to use for this Glue job + * */ + connections?: IConnection[]; + + /** + * Max Retries (optional) + * Maximum number of retry attempts Glue perform + * if the job fails + * @default 0 + * */ + maxRetries?: int; + + /** + * Timeout (optional) + * Timeout for the Glue job, specified in minutes + * @default 2880 (2 days for non-streaming) + * */ + timeout?: cdk.Duration; + + /** + * Security Configuration (optional) + * Defines the encryption options for the Glue job + * */ + securityConfiguration?: ISecurityConfiguration; + + /** + * Tags (optional) + * A list of key:value pairs of tags to apply to this Glue job resource + * */ + tags?: {[key: string], string }[]; + + /** + * Glue Version + * The version of Glue to use to execute this job + * @default 3.0 + * */ + glueVersion?: glue.GlueVersion; +} ``` -Glue tables can be configured to contain user-defined properties, to describe the physical storage of table data, through the `storageParameters` property: +### Python Shell Jobs + +Python shell jobs support a Python version that depends on the AWS Glue +version you use. These can be used to schedule and run tasks that don't +require an Apache Spark environment. Python shell jobs default to +Python 3.9 and a MaxCapacity of `0.0625`. Python 3.9 supports pre-loaded +analytics libraries using the `library-set=analytics` flag, which is +enabled by default. ```ts -declare const myDatabase: glue.Database; -new glue.S3Table(this, 'MyTable', { - storageParameters: [ - glue.StorageParameter.skipHeaderLineCount(1), - glue.StorageParameter.compressionType(glue.CompressionType.GZIP), - glue.StorageParameter.custom('separatorChar', ',') - ], - // ... - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - dataFormat: glue.DataFormat.JSON, +new glue.PythonShellJob(this, 'PythonShellJob', { + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + role: iam.IRole, }); ``` -Glue tables can also be configured to contain user-defined table properties through the [`parameters`](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-glue-table-tableinput.html#cfn-glue-table-tableinput-parameters) property: +Optional override examples: ```ts -declare const myDatabase: glue.Database; -new glue.S3Table(this, 'MyTable', { - parameters: { - key1: 'val1', - key2: 'val2', - }, - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - dataFormat: glue.DataFormat.JSON, +new glue.PythonShellJob(this, 'PythonShellJob', { + glueVersion: glue.GlueVersion.V1_0, + pythonVersion: glue.PythonVersion.3_9, + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + description: 'an example Python Shell job', + numberOfWorkers: 20, + workerType: glue.WorkerType.G8X, + timeout: cdk.Duration.minutes(15), + role: iam.IRole, }); ``` -### Partition Keys - -To improve query performance, a table can specify `partitionKeys` on which data is stored and queried separately. For example, you might partition a table by `year` and `month` to optimize queries based on a time window: +Python Shell Job Property Interface: ```ts -declare const myDatabase: glue.Database; -new glue.S3Table(this, 'MyTable', { - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - partitionKeys: [{ - name: 'year', - type: glue.Schema.SMALL_INT, - }, { - name: 'month', - type: glue.Schema.SMALL_INT, - }], - dataFormat: glue.DataFormat.JSON, -}); +PythonShellJobProps{ + /** + * Script Code Location (required) + * Script to run when the Glue job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + * */ + script: glue.Code; + + /** + * IAM Role (required) + * IAM Role to use for Glue job execution + * Must be specified by the developer because the L2 doesn't have visibility + * into the actions the script(s) take during the job execution + * */ + role: iam.IRole; + + /** + * Name of the Glue job (optional) + * Developer-specified name of the Glue job + * */ + name?: string; + + /** + * Description (optional) + * Developer-specified description of the Glue job + * */ + description?: string; + + /** + * Number of Workers (optional) + * Number of workers for Glue to use during job execution + * @default 10 + * */ + numberOrWorkers?: int; + + /** + * Worker Type (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X + * @default G_2X + * */ + workerType?: glue.WorkerType; + + /** + * Max Concurrent Runs (optional) + * The maximum number of runs this Glue job can concurrently run + * @default 1 + * */ + maxConcurrentRuns?: int; + + /** + * Default Arguments (optional) + * The default arguments for every run of this Glue job, + * specified as name-value pairs. + * */ + defaultArguments?: {[key: string], string }[]; + + /** + * Connections (optional) + * List of connections to use for this Glue job + * */ + connections?: IConnection[]; + + /** + * Max Retries (optional) + * Maximum number of retry attempts Glue perform + * if the job fails + * @default 0 + * */ + maxRetries?: int; + + /** + * Timeout (optional) + * Timeout for the Glue job, specified in minutes + * @default 2880 (2 days for non-streaming) + * */ + timeout?: cdk.Duration; + + /** + * Security Configuration (optional) + * Defines the encryption options for the Glue job + * */ + securityConfiguration?: ISecurityConfiguration; + + /** + * Tags (optional) + * A list of key:value pairs of tags to apply to this Glue job resource + * */ + tags?: {[key: string], string }[]; + + /** + * Glue Version + * The version of Glue to use to execute this job + * @default 3.0 for ETL + * */ + glueVersion?: glue.GlueVersion; +} ``` -### Partition Indexes - -Another way to improve query performance is to specify partition indexes. If no partition indexes are -present on the table, AWS Glue loads all partitions of the table and filters the loaded partitions using -the query expression. The query takes more time to run as the number of partitions increase. With an -index, the query will try to fetch a subset of the partitions instead of loading all partitions of the -table. +### Ray Jobs -The keys of a partition index must be a subset of the partition keys of the table. You can have a -maximum of 3 partition indexes per table. To specify a partition index, you can use the `partitionIndexes` -property: +Glue Ray jobs use worker type Z.2X and Glue version 4.0. These are not +overrideable since these are the only configuration that Glue Ray jobs +currently support. The runtime defaults to Ray2.4 and min workers defaults to 3. ```ts -declare const myDatabase: glue.Database; -new glue.S3Table(this, 'MyTable', { - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - partitionKeys: [{ - name: 'year', - type: glue.Schema.SMALL_INT, - }, { - name: 'month', - type: glue.Schema.SMALL_INT, - }], - partitionIndexes: [{ - indexName: 'my-index', // optional - keyNames: ['year'], - }], // supply up to 3 indexes - dataFormat: glue.DataFormat.JSON, +new glue.GlueRayJob(this, 'GlueRayJob', { + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + role: iam.IRole, }); ``` -Alternatively, you can call the `addPartitionIndex()` function on a table: +Optional override example: ```ts -declare const myTable: glue.Table; -myTable.addPartitionIndex({ - indexName: 'my-index', - keyNames: ['year'], +new glue.GlueRayJob(this, 'GlueRayJob', { + script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + numberOfWorkers: 50, + minWorkers: 25, + role: iam.IRole, }); ``` -### Partition Filtering - -If you have a table with a large number of partitions that grows over time, consider using AWS Glue partition indexing and filtering. +Ray Job Property Interface: ```ts -declare const myDatabase: glue.Database; -new glue.S3Table(this, 'MyTable', { - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - partitionKeys: [{ - name: 'year', - type: glue.Schema.SMALL_INT, - }, { - name: 'month', - type: glue.Schema.SMALL_INT, - }], - dataFormat: glue.DataFormat.JSON, - enablePartitionFiltering: true, -}); +RayJobProps{ + /** + * Script Code Location (required) + * Script to run when the Glue job executes. Can be uploaded + * from the local directory structure using fromAsset + * or referenced via S3 location using fromBucket + * */ + script: glue.Code; + + /** + * IAM Role (required) + * IAM Role to use for Glue job execution + * Must be specified by the developer because the L2 doesn't have visibility + * into the actions the script(s) take during the job execution + * */ + role: iam.IRole; + + /** + * Name of the Glue job (optional) + * Developer-specified name of the Glue job + * */ + name?: string; + + /** + * Description (optional) + * Developer-specified description of the Glue job + * */ + description?: string; + + /** + * Number of Workers (optional) + * Number of workers for Glue to use during job execution + * @default 10 + * */ + numberOrWorkers?: int; + + /** + * Worker Type (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X + * @default Z_2X + * */ + workerType?: glue.WorkerType; + + /** + * Runtime (optional) + * Type of Worker for Glue to use during job execution + * Enum options: Ray2_2, Ray 2_3, Ray2_4 + * @default Ray2_4 + * */ + runtime?: glue.RayRuntime; + + /** + * Max Concurrent Runs (optional) + * The maximum number of runs this Glue job can concurrently run + * @default 1 + * */ + maxConcurrentRuns?: int; + + /** + * Default Arguments (optional) + * The default arguments for every run of this Glue job, + * specified as name-value pairs. + * */ + defaultArguments?: {[key: string], string }[]; + + /** + * Connections (optional) + * List of connections to use for this Glue job + * */ + connections?: IConnection[]; + + /** + * Max Retries (optional) + * Maximum number of retry attempts Glue perform + * if the job fails + * @default 0 + * */ + maxRetries?: int; + + /** + * Timeout (optional) + * Timeout for the Glue job, specified in minutes + * @default 2880 (2 days for non-streaming) + * */ + timeout?: cdk.Duration; + + /** + * Security Configuration (optional) + * Defines the encryption options for the Glue job + * */ + securityConfiguration?: ISecurityConfiguration; + + /** + * Tags (optional) + * A list of key:value pairs of tags to apply to this Glue job resource + * */ + tags?: {[key: string], string }[]; + + /** + * Glue Version + * The version of Glue to use to execute this job + * @default 4.0 + * */ + glueVersion?: glue.GlueVersion; +} ``` -### Glue Connections - -Glue connections allow external data connections to third party databases and data warehouses. However, these connections can also be assigned to Glue Tables, allowing you to query external data sources using the Glue Data Catalog. +### Uploading scripts from the CDK app repository to S3 -Whereas `S3Table` will point to (and if needed, create) a bucket to store the tables' data, `ExternalTable` will point to an existing table in a data source. For example, to create a table in Glue that points to a table in Redshift: +Similar to other L2 constructs, the Glue L2 automates uploading / updating +scripts to S3 via an optional fromAsset parameter pointing to a script +in the local file structure. You provide the existing S3 bucket and +path to which you'd like the script to be uploaded. ```ts -declare const myConnection: glue.Connection; -declare const myDatabase: glue.Database; -new glue.ExternalTable(this, 'MyTable', { - connection: myConnection, - externalDataLocation: 'default_db_public_example', // A table in Redshift - // ... - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - dataFormat: glue.DataFormat.JSON, +glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { + script: glue.Code.fromAsset('bucket-name', 'local/path/to/scala-jar'), + className: 'com.example.HelloWorld', }); ``` -## [Encryption](https://docs.aws.amazon.com/athena/latest/ug/encryption.html) +### Workflow Triggers -You can enable encryption on a Table's data: +You can use Glue workflows to create and visualize complex +extract, transform, and load (ETL) activities involving multiple crawlers, +jobs, and triggers. Standalone triggers are an anti-pattern, so you must +create triggers from within a workflow using the L2 construct. -* [S3Managed](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html) - (default) Server side encryption (`SSE-S3`) with an Amazon S3-managed key. +Within a workflow object, there are functions to create different +types of triggers with actions and predicates. You then add those triggers +to jobs. -```ts -declare const myDatabase: glue.Database; -new glue.S3Table(this, 'MyTable', { - encryption: glue.TableEncryption.S3_MANAGED, - // ... - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - dataFormat: glue.DataFormat.JSON, -}); -``` +StartOnCreation defaults to true for all trigger types, but you can +override it if you prefer for your trigger not to start on creation. + +1. **On-Demand Triggers** -* [Kms](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html) - Server-side encryption (`SSE-KMS`) with an AWS KMS Key managed by the account owner. +On-demand triggers can start glue jobs or crawlers. This construct provides +convenience functions to create on-demand crawler or job triggers. The constructor +takes an optional description parameter, but abstracts the requirement of an +actions list using the job or crawler objects using conditional types. ```ts -declare const myDatabase: glue.Database; -// KMS key is created automatically -new glue.S3Table(this, 'MyTable', { - encryption: glue.TableEncryption.KMS, - // ... - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - dataFormat: glue.DataFormat.JSON, +myWorkflow = new glue.Workflow(this, "GlueWorkflow", { + name: "MyWorkflow"; + description: "New Workflow"; + properties: {'key', 'value'}; }); -// with an explicit KMS key -new glue.S3Table(this, 'MyTable', { - encryption: glue.TableEncryption.KMS, - encryptionKey: new kms.Key(this, 'MyKey'), - // ... - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - dataFormat: glue.DataFormat.JSON, +myWorkflow.onDemandTrigger(this, 'TriggerJobOnDemand', { + description: 'On demand run for ' + glue.JobExecutable.name, + actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] }); ``` -* [KmsManaged](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html) - Server-side encryption (`SSE-KMS`), like `Kms`, except with an AWS KMS Key managed by the AWS Key Management Service. +1. **Scheduled Triggers** + +You can create scheduled triggers using cron expressions. This construct +provides daily, weekly, and monthly convenience functions, +as well as a custom function that allows you to create your own +custom timing using the [existing event Schedule class](https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_events.Schedule.html) +without having to build your own cron expressions. The L2 extracts +the expression that Glue requires from the Schedule object. The constructor +takes an optional description and a list of jobs or crawlers as actions. ```ts -declare const myDatabase: glue.Database; -new glue.S3Table(this, 'MyTable', { - encryption: glue.TableEncryption.KMS_MANAGED, - // ... - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - dataFormat: glue.DataFormat.JSON, +// Create Daily Schedule at 00 UTC +myWorkflow.dailyScheduleTrigger(this, 'TriggerCrawlerOnDailySchedule', { + description: 'Scheduled run for ' + glue.JobExecutable.name, + actions: [ jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] }); -``` -* [ClientSideKms](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingClientSideEncryption.html#client-side-encryption-kms-managed-master-key-intro) - Client-side encryption (`CSE-KMS`) with an AWS KMS Key managed by the account owner. - -```ts -declare const myDatabase: glue.Database; -// KMS key is created automatically -new glue.S3Table(this, 'MyTable', { - encryption: glue.TableEncryption.CLIENT_SIDE_KMS, - // ... - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - dataFormat: glue.DataFormat.JSON, +// Create Weekly schedule at 00 UTC on Sunday +myWorkflow.weeklyScheduleTrigger(this, 'TriggerJobOnWeeklySchedule', { + description: 'Scheduled run for ' + glue.JobExecutable.name, + actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] }); -// with an explicit KMS key -new glue.S3Table(this, 'MyTable', { - encryption: glue.TableEncryption.CLIENT_SIDE_KMS, - encryptionKey: new kms.Key(this, 'MyKey'), - // ... - database: myDatabase, - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - dataFormat: glue.DataFormat.JSON, +// Create Custom schedule, e.g. Monthly on the 7th day at 15:30 UTC +myWorkflow.customScheduleJobTrigger(this, 'TriggerCrawlerOnCustomSchedule', { + description: 'Scheduled run for ' + glue.JobExecutable.name, + actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] + schedule: events.Schedule.cron(day: '7', hour: '15', minute: '30') }); ``` -*Note: you cannot provide a `Bucket` when creating the `S3Table` if you wish to use server-side encryption (`KMS`, `KMS_MANAGED` or `S3_MANAGED`)*. - -## Types +#### **3. Notify Event Triggers** -A table's schema is a collection of columns, each of which have a `name` and a `type`. Types are recursive structures, consisting of primitive and complex types: +There are two types of notify event triggers: batching and non-batching. +For batching triggers, you must specify `BatchSize`. For non-batching +triggers, `BatchSize` defaults to 1. For both triggers, `BatchWindow` +defaults to 900 seconds, but you can override the window to align with +your workload's requirements. ```ts -declare const myDatabase: glue.Database; -new glue.S3Table(this, 'MyTable', { - columns: [{ - name: 'primitive_column', - type: glue.Schema.STRING, - }, { - name: 'array_column', - type: glue.Schema.array(glue.Schema.INTEGER), - comment: 'array', - }, { - name: 'map_column', - type: glue.Schema.map( - glue.Schema.STRING, - glue.Schema.TIMESTAMP), - comment: 'map', - }, { - name: 'struct_column', - type: glue.Schema.struct([{ - name: 'nested_column', - type: glue.Schema.DATE, - comment: 'nested comment', - }]), - comment: "struct", - }], - // ... - database: myDatabase, - dataFormat: glue.DataFormat.JSON, -}); -``` - -### Primitives - -#### Numeric - -| Name | Type | Comments | -|----------- |---------- |------------------------------------------------------------------------------------------------------------------ | -| FLOAT | Constant | A 32-bit single-precision floating point number | -| INTEGER | Constant | A 32-bit signed value in two's complement format, with a minimum value of -2^31 and a maximum value of 2^31-1 | -| DOUBLE | Constant | A 64-bit double-precision floating point number | -| BIG_INT | Constant | A 64-bit signed INTEGER in two’s complement format, with a minimum value of -2^63 and a maximum value of 2^63 -1 | -| SMALL_INT | Constant | A 16-bit signed INTEGER in two’s complement format, with a minimum value of -2^15 and a maximum value of 2^15-1 | -| TINY_INT | Constant | A 8-bit signed INTEGER in two’s complement format, with a minimum value of -2^7 and a maximum value of 2^7-1 | - -#### Date and time - -| Name | Type | Comments | -|----------- |---------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| DATE | Constant | A date in UNIX format, such as YYYY-MM-DD. | -| TIMESTAMP | Constant | Date and time instant in the UNiX format, such as yyyy-mm-dd hh:mm:ss[.f...]. For example, TIMESTAMP '2008-09-15 03:04:05.324'. This format uses the session time zone. | - -#### String - -| Name | Type | Comments | -|-------------------------------------------- |---------- |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| STRING | Constant | A string literal enclosed in single or double quotes | -| decimal(precision: number, scale?: number) | Function | `precision` is the total number of digits. `scale` (optional) is the number of digits in fractional part with a default of 0. For example, use these type definitions: decimal(11,5), decimal(15) | -| char(length: number) | Function | Fixed length character data, with a specified length between 1 and 255, such as char(10) | -| varchar(length: number) | Function | Variable length character data, with a specified length between 1 and 65535, such as varchar(10) | - -#### Miscellaneous - -| Name | Type | Comments | -|--------- |---------- |------------------------------- | -| BOOLEAN | Constant | Values are `true` and `false` | -| BINARY | Constant | Value is in binary | - -### Complex +myWorkflow.notifyEventTrigger(this, 'MyNotifyTriggerBatching', { + batchSize: int, + jobActions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...], + actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ... ] +}); -| Name | Type | Comments | -|------------------------------------- |---------- |------------------------------------------------------------------- | -| array(itemType: Type) | Function | An array of some other type | -| map(keyType: Type, valueType: Type) | Function | A map of some primitive key type to any value type | -| struct(collumns: Column[]) | Function | Nested structure containing individually named and typed collumns | +myWorkflow.notifyEventTrigger(this, 'MyNotifyTriggerNonBatching', { + actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] +}); +``` -## Data Quality Ruleset +#### **4. Conditional Triggers** -A `DataQualityRuleset` specifies a data quality ruleset with DQDL rules applied to a specified AWS Glue table. For example, to create a data quality ruleset for a given table: +Conditional triggers have a predicate and actions associated with them. +The trigger actions are executed when the predicateCondition is true. ```ts -new glue.DataQualityRuleset(this, 'MyDataQualityRuleset', { - clientToken: 'client_token', - description: 'description', - rulesetName: 'ruleset_name', - rulesetDqdl: 'ruleset_dqdl', - tags: { - key1: 'value1', - key2: 'value2', - }, - targetTable: new glue.DataQualityTargetTable('database_name', 'table_name'), +// Triggers on Job and Crawler status +myWorkflow.conditionalTrigger(this, 'conditionalTrigger', { + description: 'Conditional trigger for ' + myGlueJob.name, + actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] + predicateCondition: glue.TriggerPredicateCondition.AND, + jobPredicates: [{'job': JobExecutable, 'state': glue.JobState.FAILED}, + {'job': JobExecutable, 'state' : glue.JobState.SUCCEEDED}] }); ``` -For more information, see [AWS Glue Data Quality](https://docs.aws.amazon.com/glue/latest/dg/glue-data-quality.html). +### Connection Properties + +A `Connection` allows Glue jobs, crawlers and development endpoints to access +certain types of data stores. + +***Secrets Management + **You must specify JDBC connection credentials in Secrets Manager and + provide the Secrets Manager Key name as a property to the job connection. + +* **Networking - the CDK determines the best fit subnet for Glue connection +configuration + **The prior version of the glue-alpha-module requires the developer to + specify the subnet of the Connection when it’s defined. Now, you can still + specify the specific subnet you want to use, but are no longer required + to. You are only required to provide a VPC and either a public or private + subnet selection. Without a specific subnet provided, the L2 leverages the + existing [EC2 Subnet Selection](https://docs.aws.amazon.com/cdk/api/v2/python/aws_cdk.aws_ec2/SubnetSelection.html) + library to make the best choice selection for the subnet. + +## Public FAQ + +### What are we launching today? + +We’re launching new features to an AWS CDK Glue L2 Construct to provide +best-practice defaults and convenience methods to create Glue Jobs, Connections, +Triggers, Workflows, and the underlying permissions and configuration. + +### Why should I use this Construct? + +Developers should use this Construct to reduce the amount of boilerplate +code and complexity each individual has to navigate, and make it easier to +create best-practice Glue resources. + +### What’s not in scope? + +Glue Crawlers and other resources that are now managed by the AWS LakeFormation +team are not in scope for this effort. Developers should use existing methods +to create these resources, and the new Glue L2 construct assumes they already +exist as inputs. While best practice is for application and infrastructure code +to be as close as possible for teams using fully-implemented DevOps mechanisms, +in practice these ETL scripts are likely managed by a data science team who +know Python or Scala and don’t necessarily own or manage their own +infrastructure deployments. We want to meet developers where they are, and not +assume that all of the code resides in the same repository, Developers can +automate this themselves via the CDK, however, if they do own both. + +Validating Glue version and feature use per AWS region at synth time is also +not in scope. AWS’ intention is for all features to eventually be propagated to +all Global regions, so the complexity involved in creating and updating region- +specific configuration to match shifting feature sets does not out-weigh the +likelihood that a developer will use this construct to deploy resources to a +region without a particular new feature to a region that doesn’t yet support +it without researching or manually attempting to use that feature before +developing it via IaC. The developer will, of course, still get feedback from +the underlying Glue APIs as CloudFormation deploys the resources similar to the +current CDK L1 Glue experience. \ No newline at end of file From 316c670ea9fe6a6808fd3dbc3195b9412502cb63 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Fri, 12 Jul 2024 21:55:06 +0000 Subject: [PATCH 39/51] Resolve README linter issues --- packages/@aws-cdk/aws-glue-alpha/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index 9cb76fb062f59..9595c8e15f939 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -358,7 +358,7 @@ pySparkEtlJobProps{ } ``` -2. **Streaming Jobs** +**Streaming Jobs** Streaming jobs are similar to ETL jobs, except that they perform ETL on data streams using the Apache Spark Structured Streaming framework. Some Spark @@ -629,7 +629,7 @@ pySparkStreamingJobProps{ } ``` -3. **Flex Jobs** +**Flex Jobs** The flexible execution class is appropriate for non-urgent jobs such as pre-production jobs, testing, and one-time data loads. Flexible jobs default @@ -1335,4 +1335,4 @@ region without a particular new feature to a region that doesn’t yet support it without researching or manually attempting to use that feature before developing it via IaC. The developer will, of course, still get feedback from the underlying Glue APIs as CloudFormation deploys the resources similar to the -current CDK L1 Glue experience. \ No newline at end of file +current CDK L1 Glue experience. From 6343ad2d0843e88b98fbaa0ff9b2453bcfa2e2a4 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Mon, 15 Jul 2024 04:06:15 +0000 Subject: [PATCH 40/51] Increase unit test coverage, especially for pyspark etl jobs and ray jobs --- .../aws-glue-alpha/lib/jobs/ray-job.ts | 21 +- .../test/pyspark-etl-jobs.test.ts | 426 ++++++++++++++++++ .../test/pyspark-flex-etl-jobs.test.ts | 85 +++- .../test/pyspark-streaming-jobs.test.ts | 83 +++- .../test/python-shell-job.test.ts | 75 ++- .../aws-glue-alpha/test/ray-job.test.ts | 235 +++++++++- .../test/scalaspark-etl-jobs.test.ts | 85 +++- 7 files changed, 993 insertions(+), 17 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts index 44d979833f58f..0822ae797ed24 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts @@ -7,9 +7,10 @@ import { JobType, GlueVersion, WorkerType, Runtime } from '../constants'; /** * Ray Jobs class * - * Glue ray only supports worker type Z.2X and Glue version 4.0. - * Runtime will default to Ray2.4 and min workers will default to 3. - * + * Glue Ray jobs use worker type Z.2X and Glue version 4.0. + * These are not overrideable since these are the only configuration that + * Glue Ray jobs currently support. The runtime defaults to Ray2.4 and min + * workers defaults to 3. */ /** @@ -48,10 +49,7 @@ export class RayJob extends Job { physicalName: props.jobName, }); - // List of supported Glue versions by Ray - const supportedGlueVersions = [ - GlueVersion.V4_0, - ]; + this.jobName = props.jobName ?? ''; // Set up role and permissions for principal this.role = props.role, { @@ -77,9 +75,9 @@ export class RayJob extends Job { throw new Error('Ray jobs only support Z.2X worker type'); }; - if (props.glueVersion && !(supportedGlueVersions.includes(props.glueVersion))) { - throw new Error('You must set GlueVersion to 4.0 or greater'); - }; + if ((!props.workerType && props.numberOfWorkers !== undefined) || (props.workerType && props.numberOfWorkers === undefined)) { + throw new Error('Both workerType and numberOFWorkers must be set'); + } const jobResource = new CfnJob(this, 'Resource', { name: props.jobName, @@ -90,7 +88,7 @@ export class RayJob extends Job { scriptLocation: this.codeS3ObjectUrl(props.script), runtime: props.runtime ? props.runtime : Runtime.RAY_TWO_FOUR, }, - glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, + glueVersion: GlueVersion.V4_0, workerType: props.workerType ? props.workerType : WorkerType.Z_2X, numberOfWorkers: props.numberOfWorkers ? props.numberOfWorkers: 3, maxRetries: props.maxRetries, @@ -105,7 +103,6 @@ export class RayJob extends Job { const resourceName = this.getResourceNameAttribute(jobResource.ref); this.jobArn = this.buildJobArn(this, resourceName); this.jobName = resourceName; - } } \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts index a598468a45c70..07b50a8fc9330 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts @@ -11,6 +11,7 @@ describe('Job', () => { let script: glue.Code; let codeBucket: s3.IBucket; let job: glue.IJob; + let sparkUIBucket: s3.Bucket; beforeEach(() => { stack = new cdk.Stack(); @@ -130,4 +131,429 @@ describe('Job', () => { }); }); + + describe('Create PySpark ETL Job with G2 worker type with 2 workers', () => { + + beforeEach(() => { + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + role, + script, + jobName: 'PySparkETLJob', + workerType: glue.WorkerType.G_2X, + numberOfWorkers: 2, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Overriden numberOfWorkers should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 2, + }); + }); + + test('Overriden WorkerType should be G.1X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.2X', + }); + }); + }); + + describe('Create PySpark ETL Job with G4 worker type with 4 workers', () => { + + beforeEach(() => { + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + role, + script, + jobName: 'PySparkETLJob', + workerType: glue.WorkerType.G_4X, + numberOfWorkers: 4, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Overriden numberOfWorkers should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 4, + }); + }); + + test('Overriden WorkerType should be G.4X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.4X', + }); + }); + }); + + describe('Create PySpark ETL Job with G8 worker type and 8 workers', () => { + + beforeEach(() => { + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + role, + script, + jobName: 'PySparkETLJob', + workerType: glue.WorkerType.G_8X, + numberOfWorkers: 8, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Overriden numberOfWorkers should be 8', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 8, + }); + }); + + test('Overriden WorkerType should be G.8X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.8X', + }); + }); + }); + + describe('Override SparkUI properties for PySpark ETL Job', () => { + + beforeEach(() => { + sparkUIBucket = new s3.Bucket(stack, 'sparkUIbucket', { bucketName: 'bucket-name' }); + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + role, + script, + jobName: 'PySparkETLJob', + sparkUI: { + bucket: sparkUIBucket, + prefix: '/prefix', + }, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: glue.GlueVersion.V4_0, + }); + }); + + test('Has Continuous Logging and SparkUIEnabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + '--enable-spark-ui': 'true', + '--spark-event-logs-path': Match.objectLike({ + 'Fn::Join': [ + '', + [ + 's3://', + { Ref: Match.anyValue() }, + '/prefix/', + ], + ], + }), + }), + }); + }); + }); + + describe('Invalid overrides should cause errors', () => { + + test('Invalid SparkUI prefix should throw an error', () => { + expect(() => { + sparkUIBucket = new s3.Bucket(stack, 'sparkUIbucket', { bucketName: 'bucket-name' }); + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + role, + script, + jobName: 'PySparkETLJob', + sparkUI: { + bucket: sparkUIBucket, + prefix: 'prefix', + }, + numberOfWorkers: 8, + workerType: glue.WorkerType.G_8X, + continuousLogging: { enabled: false }, + }); + }).toThrow('Invalid prefix format (value: prefix)'); + }); + + test('Create PySpark ETL Job overriding only workerType to cause an Error', () => { + expect(() => { + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + role, + script, + jobName: 'PySparkETLJob', + workerType: glue.WorkerType.G_2X, + }); + }).toThrow(new Error('Both workerType and numberOFWorkers must be set')); + }); + + test('Create PySpark ETL Job overriding only numberOfWorkers to cause an Error', () => { + expect(() => { + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + role, + script, + jobName: 'PySparkETLJob', + numberOfWorkers: 5, + }); + }).toThrow(new Error('Both workerType and numberOFWorkers must be set')); + }); + }); + + describe('Create PySpark ETL Job with extraPythonFiles and extraFiles', () => { + + beforeEach(() => { + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + role, + script, + jobName: 'PySparkETLJob', + extraPythonFiles: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraPythonFilesBucket', 'extra-python-files-bucket'), + 'prefix/file.py'), + ], + extraFiles: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraFilesBucket', 'extra-files-bucket'), + 'prefix/file.txt'), + ], + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: glue.GlueVersion.V4_0, + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + '--extra-py-files': 's3://extra-python-files-bucket/prefix/file.py', + '--extra-files': 's3://extra-files-bucket/prefix/file.txt', + }), + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + + test('Default WorkerType should be G.1X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.1X', + }); + }); + }); + + describe('Create PySpark ETL Job with optional properties', () => { + + beforeEach(() => { + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + jobName: 'PySparkETLJobCustomName', + description: 'This is a description', + role, + script, + glueVersion: glue.GlueVersion.V3_0, + continuousLogging: { enabled: false }, + workerType: glue.WorkerType.G_2X, + maxConcurrentRuns: 100, + timeout: cdk.Duration.hours(2), + connections: [glue.Connection.fromConnectionName(stack, 'Connection', 'connectionName')], + securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'SecurityConfig', 'securityConfigName'), + tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + numberOfWorkers: 2, + maxRetries: 2, + }); + }); + + test('Test job attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Custom Job Name and Description', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Name: 'PySparkETLJobCustomName', + Description: 'This is a description', + }); + }); + + test('Overriden Glue Version should be 3.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '3.0', + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + }), + }); + }); + + test('Overriden numberOfWorkers should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 2, + }); + }); + + test('Overriden WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: glue.WorkerType.G_2X, + }); + }); + + test('Overriden max retries should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxRetries: 2, + }); + }); + + test('Overriden max concurrent runs should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionProperty: { + MaxConcurrentRuns: 100, + }, + }); + }); + + test('Overriden timeout should be 2 hours', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Timeout: 120, + }); + }); + + test('Overriden connections should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Connections: { + Connections: ['connectionName'], + }, + }); + }); + + test('Overriden security configuration should be set', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + SecurityConfiguration: 'securityConfigName', + }); + }); + + test('Should have tags', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + }); + }); + }); + }); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts index 099a81bf4f2c5..5736101dc6055 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-flex-etl-jobs.test.ts @@ -2,7 +2,8 @@ import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import { Template } from 'aws-cdk-lib/assertions'; +import { Template, Match } from 'aws-cdk-lib/assertions'; +import { LogGroup } from 'aws-cdk-lib/aws-logs'; describe('Job', () => { let stack: cdk.Stack; @@ -50,5 +51,87 @@ describe('Job', () => { ExecutionClass: 'FLEX', }); }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + + }); + + describe('Create new PySpark ETL Job with log override parameters', () => { + + beforeEach(() => { + job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { + jobName: 'PySparkETLJob', + role, + script, + continuousLogging: { + enabled: true, + quiet: true, + logGroup: new LogGroup(stack, 'logGroup', { + logGroupName: '/aws-glue/jobs/${job.jobName}', + }), + logStreamPrefix: 'logStreamPrefix', + conversionPattern: 'convert', + }, + }); + }); + + test('Has Continuous Logging enabled with optional args', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--continuous-log-logGroup': Match.objectLike({ + Ref: Match.anyValue(), + }), + '--enable-continuous-cloudwatch-log': 'true', + '--enable-continuous-log-filter': 'true', + '--continuous-log-logStreamPrefix': 'logStreamPrefix', + '--continuous-log-conversionPattern': 'convert', + }), + }); + }); + + }); + + describe('Create new PySpark ETL Flex Job with logging explicitly disabled', () => { + + beforeEach(() => { + job = new glue.PySparkFlexEtlJob(stack, 'PySparkFlexETLJob', { + jobName: 'PySparkFlexETLJob', + role, + script, + continuousLogging: { + enabled: false, + }, + }); + }); + + test('Has Continuous Logging Disabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: { + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + }, + }); + }); + }); + }); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts index 3320ffd288298..4babf6ec557b2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts @@ -2,7 +2,8 @@ import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import { Template } from 'aws-cdk-lib/assertions'; +import { Template, Match } from 'aws-cdk-lib/assertions'; +import { LogGroup } from 'aws-cdk-lib/aws-logs'; describe('Job', () => { let stack: cdk.Stack; @@ -50,5 +51,85 @@ describe('Job', () => { WorkerType: 'G.1X', }); }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + }); + + describe('Create new PySpark ETL Job with log override parameters', () => { + + beforeEach(() => { + job = new glue.PySparkStreamingJob(stack, 'PySparkETLJob', { + jobName: 'PySparkETLJob', + role, + script, + continuousLogging: { + enabled: true, + quiet: true, + logGroup: new LogGroup(stack, 'logGroup', { + logGroupName: '/aws-glue/jobs/${job.jobName}', + }), + logStreamPrefix: 'logStreamPrefix', + conversionPattern: 'convert', + }, + }); + }); + + test('Has Continuous Logging enabled with optional args', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--continuous-log-logGroup': Match.objectLike({ + Ref: Match.anyValue(), + }), + '--enable-continuous-cloudwatch-log': 'true', + '--enable-continuous-log-filter': 'true', + '--continuous-log-logStreamPrefix': 'logStreamPrefix', + '--continuous-log-conversionPattern': 'convert', + }), + }); + }); + + }); + + describe('Create new PySpark Streaming Job with logging explicitly disabled', () => { + + beforeEach(() => { + job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { + jobName: 'PySparkStreamingJob', + role, + script, + continuousLogging: { + enabled: false, + }, + }); + }); + + test('Has Continuous Logging Disabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: { + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + }, + }); + }); + }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts index b76fdc7cc8055..f39eb7a8c84d8 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts @@ -2,7 +2,8 @@ import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import { Template } from 'aws-cdk-lib/assertions'; +import { Template, Match } from 'aws-cdk-lib/assertions'; +import { LogGroup } from 'aws-cdk-lib/aws-logs'; describe('Job', () => { let stack: cdk.Stack; @@ -50,5 +51,77 @@ describe('Job', () => { MaxCapacity: 0.0625, }); }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + }); + + describe('Create new Python Shell Job with log override parameters', () => { + + beforeEach(() => { + job = new glue.PythonShellJob(stack, 'PythonShellJob', { + jobName: 'PythonShellJob', + role, + script, + continuousLogging: { + enabled: true, + quiet: true, + logGroup: new LogGroup(stack, 'logGroup', { + logGroupName: '/aws-glue/jobs/${job.jobName}', + }), + logStreamPrefix: 'logStreamPrefix', + conversionPattern: 'convert', + }, + }); + }); + + test('Has Continuous Logging enabled with optional args', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--continuous-log-logGroup': Match.objectLike({ + Ref: Match.anyValue(), + }), + '--enable-continuous-cloudwatch-log': 'true', + '--enable-continuous-log-filter': 'true', + '--continuous-log-logStreamPrefix': 'logStreamPrefix', + '--continuous-log-conversionPattern': 'convert', + }), + }); + }); + + }); + + describe('Create new Python Shell Job with logging explicitly disabled', () => { + + beforeEach(() => { + job = new glue.PythonShellJob(stack, 'PythonShellJob', { + jobName: 'PythonShellJob', + role, + script, + continuousLogging: { + enabled: false, + }, + }); + }); + + test('Has Continuous Logging Disabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: { + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + }, + }); + }); + }); }); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts index 0581ea71b6329..1d0bf4e184207 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts @@ -3,7 +3,8 @@ import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import { Template } from 'aws-cdk-lib/assertions'; +import { Template, Match } from 'aws-cdk-lib/assertions'; +import { LogGroup } from 'aws-cdk-lib/aws-logs'; describe('Job', () => { let stack: cdk.Stack; @@ -51,5 +52,237 @@ describe('Job', () => { WorkerType: 'Z.2X', }); }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Default numberOfWorkers should be 3', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 3, + }); + }); + }); + + describe('Create new Ray Job with log override parameters', () => { + + beforeEach(() => { + job = new glue.RayJob(stack, 'RayJob', { + jobName: 'RayJob', + role, + script, + continuousLogging: { + enabled: true, + quiet: true, + logGroup: new LogGroup(stack, 'logGroup', { + logGroupName: '/aws-glue/jobs/${job.jobName}', + }), + logStreamPrefix: 'logStreamPrefix', + conversionPattern: 'convert', + }, + }); + }); + + test('Has Continuous Logging enabled with optional args', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--continuous-log-logGroup': Match.objectLike({ + Ref: Match.anyValue(), + }), + '--enable-continuous-cloudwatch-log': 'true', + '--enable-continuous-log-filter': 'true', + '--continuous-log-logStreamPrefix': 'logStreamPrefix', + '--continuous-log-conversionPattern': 'convert', + }), + }); + }); + + }); + + describe('Create new Ray Job with logging explicitly disabled', () => { + + beforeEach(() => { + job = new glue.RayJob(stack, 'RayJob', { + jobName: 'RayJob', + role, + script, + continuousLogging: { + enabled: false, + }, + }); + }); + + test('Has Continuous Logging Disabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: { + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + }, + }); + }); + + }); + + describe('Create Ray Job with optional override parameters', () => { + + beforeEach(() => { + job = new glue.RayJob(stack, 'ImportedJob', { + role, + script, + jobName: 'RayCustomJobName', + description: 'This is a description', + workerType: glue.WorkerType.Z_2X, + numberOfWorkers: 5, + runtime: glue.Runtime.RAY_TWO_FOUR, + maxRetries: 3, + maxConcurrentRuns: 100, + timeout: cdk.Duration.hours(2), + connections: [glue.Connection.fromConnectionName(stack, 'Connection', 'connectionName')], + securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'SecurityConfig', 'securityConfigName'), + tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Cannot override Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Overridden number of workers should be 5', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 5, + }); + }); + + test('Cannot override worker type should be Z.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'Z.2X', + }); + }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Custom Job Name and Description', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Name: 'RayCustomJobName', + Description: 'This is a description', + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + }), + }); + }); + + test('Overriden max retries should be 3', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxRetries: 3, + }); + }); + + test('Overriden max concurrent runs should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionProperty: { + MaxConcurrentRuns: 100, + }, + }); + }); + + test('Overriden timeout should be 2 hours', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Timeout: 120, + }); + }); + + test('Overriden connections should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Connections: { + Connections: ['connectionName'], + }, + }); + }); + + test('Overriden security configuration should be set', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + SecurityConfiguration: 'securityConfigName', + }); + }); + + test('Should have tags', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + }); + }); + }); + + describe('Invalid overrides should cause errors', () => { + + test('Create Ray Job overriding only workerType to cause an Error', () => { + expect(() => { + job = new glue.RayJob(stack, 'RayJob', { + role, + script, + workerType: glue.WorkerType.G_025X, + }); + }).toThrow(new Error('Ray jobs only support Z.2X worker type')); + }); + + test('Create Ray Job overriding only workerType to cause an Error', () => { + expect(() => { + job = new glue.RayJob(stack, 'RayJob', { + role, + script, + workerType: glue.WorkerType.Z_2X, + }); + }).toThrow(new Error('Both workerType and numberOFWorkers must be set')); + }); + + test('Create Ray Job overriding only numberOfWorkers to cause an Error', () => { + expect(() => { + job = new glue.RayJob(stack, 'RayJob', { + role, + script, + numberOfWorkers: 5, + }); + }).toThrow(new Error('Both workerType and numberOFWorkers must be set')); + }); }); }); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts index d718dfee4302f..d033011e3d130 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts @@ -2,7 +2,8 @@ import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import { Template } from 'aws-cdk-lib/assertions'; +import { Template, Match } from 'aws-cdk-lib/assertions'; +import { LogGroup } from 'aws-cdk-lib/aws-logs'; describe('Job', () => { let stack: cdk.Stack; @@ -52,5 +53,87 @@ describe('Job', () => { WorkerType: 'G.1X', }); }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + }); + + describe('Create new Scala ETL Job with log override parameters', () => { + + beforeEach(() => { + job = new glue.ScalaSparkEtlJob(stack, 'ScalaSparkEtlJob', { + jobName: 'ScalaSparkEtlJob', + role, + script, + className: 'com.example.HelloWorld', + continuousLogging: { + enabled: true, + quiet: true, + logGroup: new LogGroup(stack, 'logGroup', { + logGroupName: '/aws-glue/jobs/${job.jobName}', + }), + logStreamPrefix: 'logStreamPrefix', + conversionPattern: 'convert', + }, + }); + }); + + test('Has Continuous Logging enabled with optional args', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--continuous-log-logGroup': Match.objectLike({ + Ref: Match.anyValue(), + }), + '--enable-continuous-cloudwatch-log': 'true', + '--enable-continuous-log-filter': 'true', + '--continuous-log-logStreamPrefix': 'logStreamPrefix', + '--continuous-log-conversionPattern': 'convert', + }), + }); + }); + + }); + + describe('Create new Scala ETL Job with logging explicitly disabled', () => { + + beforeEach(() => { + job = new glue.ScalaSparkEtlJob(stack, 'ScalaSparkEtlJob', { + jobName: 'ScalaSparkEtlJob', + role, + script, + className: 'com.example.HelloWorld', + continuousLogging: { + enabled: false, + }, + }); + }); + + test('Has Continuous Logging Disabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: { + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + }, + }); + }); + }); }); \ No newline at end of file From f4b23158a1d022d5042e9c290703d4b4205a13f1 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Mon, 15 Jul 2024 17:24:37 +0000 Subject: [PATCH 41/51] Increase unit test coverage for python shell and pyspark streaming jobs --- .../lib/jobs/pyspark-etl-job.ts | 4 - .../lib/jobs/python-shell-job.ts | 3 +- .../test/pyspark-etl-jobs.test.ts | 31 +- .../test/pyspark-streaming-jobs.test.ts | 438 +++++++++++++++++- .../test/python-shell-job.test.ts | 174 +++++++ 5 files changed, 622 insertions(+), 28 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts index f20ac3d1dbf1a..f8f4a4aa760e7 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -114,10 +114,6 @@ export class PySparkEtlJob extends Job { ...this.checkNoReservedArgs(props.defaultArguments), }; - if ((!props.workerType && props.numberOfWorkers !== undefined) || (props.workerType && props.numberOfWorkers === undefined)) { - throw new Error('Both workerType and numberOFWorkers must be set'); - } - const jobResource = new CfnJob(this, 'Resource', { name: props.jobName, description: props.description, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts index b1d0da0ca33da..5416294b8ea5d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/python-shell-job.ts @@ -2,7 +2,7 @@ import { CfnJob } from 'aws-cdk-lib/aws-glue'; import * as iam from 'aws-cdk-lib/aws-iam'; import { Job, JobProperties } from './job'; import { Construct } from 'constructs'; -import { JobType, GlueVersion, PythonVersion, MaxCapacity } from '../constants'; +import { JobType, GlueVersion, PythonVersion, MaxCapacity, JobLanguage } from '../constants'; /** * Python Shell Jobs class @@ -109,6 +109,7 @@ export class PythonShellJob extends Job { */ private executableArguments(props: PythonShellJobProps) { const args: { [key: string]: string } = {}; + args['--job-language'] = JobLanguage.PYTHON; //If no Python version set (default 3.9) or the version is set to 3.9 then set library-set argument if (!props.pythonVersion || props.pythonVersion == PythonVersion.THREE_NINE) { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts index 07b50a8fc9330..759ba9347be54 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-etl-jobs.test.ts @@ -67,6 +67,16 @@ describe('Job', () => { WorkerType: 'G.1X', }); }); + + test('Default Python version should be 3', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Command: { + Name: glue.JobType.ETL, + ScriptLocation: 's3://bucketname/script', + PythonVersion: glue.PythonVersion.THREE, + }, + }); + }); }); describe('Create new PySpark ETL Job with log override parameters', () => { @@ -358,27 +368,6 @@ describe('Job', () => { }).toThrow('Invalid prefix format (value: prefix)'); }); - test('Create PySpark ETL Job overriding only workerType to cause an Error', () => { - expect(() => { - job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { - role, - script, - jobName: 'PySparkETLJob', - workerType: glue.WorkerType.G_2X, - }); - }).toThrow(new Error('Both workerType and numberOFWorkers must be set')); - }); - - test('Create PySpark ETL Job overriding only numberOfWorkers to cause an Error', () => { - expect(() => { - job = new glue.PySparkEtlJob(stack, 'PySparkETLJob', { - role, - script, - jobName: 'PySparkETLJob', - numberOfWorkers: 5, - }); - }).toThrow(new Error('Both workerType and numberOFWorkers must be set')); - }); }); describe('Create PySpark ETL Job with extraPythonFiles and extraFiles', () => { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts index 4babf6ec557b2..5c232a61e6566 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts @@ -11,6 +11,7 @@ describe('Job', () => { let script: glue.Code; let codeBucket: s3.IBucket; let job: glue.IJob; + let sparkUIBucket: s3.Bucket; beforeEach(() => { stack = new cdk.Stack(); @@ -68,13 +69,23 @@ describe('Job', () => { NumberOfWorkers: 10, }); }); + + test('Default Python version should be 3', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Command: { + Name: glue.JobType.ETL, + ScriptLocation: 's3://bucketname/script', + PythonVersion: glue.PythonVersion.THREE, + }, + }); + }); }); describe('Create new PySpark ETL Job with log override parameters', () => { beforeEach(() => { - job = new glue.PySparkStreamingJob(stack, 'PySparkETLJob', { - jobName: 'PySparkETLJob', + job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { + jobName: 'PySparkStreamingJob', role, script, continuousLogging: { @@ -132,4 +143,427 @@ describe('Job', () => { }); }); + + describe('Create PySpark ETL Job with G2 worker type with 2 workers', () => { + + beforeEach(() => { + job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { + role, + script, + jobName: 'PySparkStreamingJob', + workerType: glue.WorkerType.G_2X, + numberOfWorkers: 2, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Overriden numberOfWorkers should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 2, + }); + }); + + test('Overriden WorkerType should be G.1X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.2X', + }); + }); + + test('Default Python version should be 3', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Command: { + Name: glue.JobType.ETL, + ScriptLocation: 's3://bucketname/script', + PythonVersion: glue.PythonVersion.THREE, + }, + }); + }); + }); + + describe('Create PySpark ETL Job with G4 worker type with 4 workers', () => { + + beforeEach(() => { + job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { + role, + script, + jobName: 'PySparkStreamingJob', + workerType: glue.WorkerType.G_4X, + numberOfWorkers: 4, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Overriden numberOfWorkers should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 4, + }); + }); + + test('Overriden WorkerType should be G.4X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.4X', + }); + }); + }); + + describe('Create PySpark ETL Job with G8 worker type and 8 workers', () => { + + beforeEach(() => { + job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { + role, + script, + jobName: 'PySparkStreamingJob', + workerType: glue.WorkerType.G_8X, + numberOfWorkers: 8, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '4.0', + }); + }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Overriden numberOfWorkers should be 8', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 8, + }); + }); + + test('Overriden WorkerType should be G.8X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.8X', + }); + }); + }); + + describe('Override SparkUI properties for PySpark ETL Job', () => { + + beforeEach(() => { + sparkUIBucket = new s3.Bucket(stack, 'sparkUIbucket', { bucketName: 'bucket-name' }); + job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { + role, + script, + jobName: 'PySparkStreamingJob', + sparkUI: { + bucket: sparkUIBucket, + prefix: '/prefix', + }, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: glue.GlueVersion.V4_0, + }); + }); + + test('Has Continuous Logging and SparkUIEnabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + '--enable-spark-ui': 'true', + '--spark-event-logs-path': Match.objectLike({ + 'Fn::Join': [ + '', + [ + 's3://', + { Ref: Match.anyValue() }, + '/prefix/', + ], + ], + }), + }), + }); + }); + }); + + describe('Invalid overrides should cause errors', () => { + + test('Invalid SparkUI prefix should throw an error', () => { + expect(() => { + sparkUIBucket = new s3.Bucket(stack, 'sparkUIbucket', { bucketName: 'bucket-name' }); + job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { + role, + script, + jobName: 'PySparkStreamingJob', + sparkUI: { + bucket: sparkUIBucket, + prefix: 'prefix', + }, + numberOfWorkers: 8, + workerType: glue.WorkerType.G_8X, + continuousLogging: { enabled: false }, + }); + }).toThrow('Invalid prefix format (value: prefix)'); + }); + + }); + + describe('Create PySpark ETL Job with extraPythonFiles and extraFiles', () => { + + beforeEach(() => { + job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { + role, + script, + jobName: 'PySparkStreamingJob', + extraPythonFiles: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraPythonFilesBucket', 'extra-python-files-bucket'), + 'prefix/file.py'), + ], + extraFiles: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraFilesBucket', 'extra-files-bucket'), + 'prefix/file.txt'), + ], + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: glue.GlueVersion.V4_0, + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + '--enable-continuous-cloudwatch-log': 'true', + '--extra-py-files': 's3://extra-python-files-bucket/prefix/file.py', + '--extra-files': 's3://extra-files-bucket/prefix/file.txt', + }), + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + + test('Default WorkerType should be G.1X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.1X', + }); + }); + }); + + describe('Create PySpark ETL Job with optional properties', () => { + + beforeEach(() => { + job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { + jobName: 'PySparkStreamingJobCustomName', + description: 'This is a description', + role, + script, + glueVersion: glue.GlueVersion.V3_0, + continuousLogging: { enabled: false }, + workerType: glue.WorkerType.G_2X, + maxConcurrentRuns: 100, + timeout: cdk.Duration.hours(2), + connections: [glue.Connection.fromConnectionName(stack, 'Connection', 'connectionName')], + securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'SecurityConfig', 'securityConfigName'), + tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + numberOfWorkers: 2, + maxRetries: 2, + }); + }); + + test('Test job attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Custom Job Name and Description', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Name: 'PySparkStreamingJobCustomName', + Description: 'This is a description', + }); + }); + + test('Overriden Glue Version should be 3.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '3.0', + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + }), + }); + }); + + test('Overriden numberOfWorkers should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 2, + }); + }); + + test('Overriden WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: glue.WorkerType.G_2X, + }); + }); + + test('Overriden max retries should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxRetries: 2, + }); + }); + + test('Overriden max concurrent runs should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionProperty: { + MaxConcurrentRuns: 100, + }, + }); + }); + + test('Overriden timeout should be 2 hours', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Timeout: 120, + }); + }); + + test('Overriden connections should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Connections: { + Connections: ['connectionName'], + }, + }); + }); + + test('Overriden security configuration should be set', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + SecurityConfiguration: 'securityConfigName', + }); + }); + + test('Should have tags', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + }); + }); + + test('Default Python version should be 3', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Command: { + Name: glue.JobType.ETL, + ScriptLocation: 's3://bucketname/script', + PythonVersion: glue.PythonVersion.THREE, + }, + }); + }); + }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts index f39eb7a8c84d8..c32ffab8296d7 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/python-shell-job.test.ts @@ -52,12 +52,24 @@ describe('Job', () => { }); }); + test('Default Python version should be 3.9', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Command: { + Name: glue.JobType.PYTHON_SHELL, + ScriptLocation: 's3://bucketname/script', + PythonVersion: glue.PythonVersion.THREE_NINE, + }, + }); + }); + test('Has Continuous Logging Enabled', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { DefaultArguments: Match.objectLike({ '--enable-metrics': '', '--enable-observability-metrics': 'true', '--enable-continuous-cloudwatch-log': 'true', + '--job-language': 'python', + 'library-set': 'analytics', }), }); }); @@ -95,6 +107,7 @@ describe('Job', () => { '--enable-continuous-log-filter': 'true', '--continuous-log-logStreamPrefix': 'logStreamPrefix', '--continuous-log-conversionPattern': 'convert', + '--job-language': 'python', }), }); }); @@ -119,9 +132,170 @@ describe('Job', () => { DefaultArguments: { '--enable-metrics': '', '--enable-observability-metrics': 'true', + '--job-language': 'python', }, }); }); }); + + describe('Create Python Shell Job with overridden Python verion and max capacity', () => { + + beforeEach(() => { + job = new glue.PythonShellJob(stack, 'PythonShellJob', { + role, + script, + jobName: 'PythonShellJob', + pythonVersion: glue.PythonVersion.TWO, + maxCapacity: glue.MaxCapacity.DPU_1, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Overridden Python version should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Command: { + Name: glue.JobType.PYTHON_SHELL, + ScriptLocation: 's3://bucketname/script', + PythonVersion: glue.PythonVersion.TWO, + }, + }); + }); + + test('Overridden Max Capacity should be 1', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxCapacity: 1, + }); + }); + + }); + + describe('Create Python Shell Job with optional properties', () => { + + beforeEach(() => { + job = new glue.PythonShellJob(stack, 'PythonShellJob', { + jobName: 'PythonShellJobCustomName', + description: 'This is a description', + pythonVersion: glue.PythonVersion.TWO, + maxCapacity: glue.MaxCapacity.DPU_1, + role, + script, + glueVersion: glue.GlueVersion.V2_0, + continuousLogging: { enabled: false }, + workerType: glue.WorkerType.G_2X, + maxConcurrentRuns: 100, + timeout: cdk.Duration.hours(2), + connections: [glue.Connection.fromConnectionName(stack, 'Connection', 'connectionName')], + securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'SecurityConfig', 'securityConfigName'), + tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + numberOfWorkers: 2, + maxRetries: 2, + }); + }); + + test('Test job attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Custom Job Name and Description', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Name: 'PythonShellJobCustomName', + Description: 'This is a description', + }); + }); + + test('Overriden Glue Version should be 2.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '2.0', + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'python', + }), + }); + }); + + test('Overriden max retries should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxRetries: 2, + }); + }); + + test('Overriden max concurrent runs should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionProperty: { + MaxConcurrentRuns: 100, + }, + }); + }); + + test('Overriden timeout should be 2 hours', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Timeout: 120, + }); + }); + + test('Overriden connections should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Connections: { + Connections: ['connectionName'], + }, + }); + }); + + test('Overriden security configuration should be set', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + SecurityConfiguration: 'securityConfigName', + }); + }); + + test('Should have tags', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + }); + }); + + test('Overridden Python version should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Command: { + Name: glue.JobType.PYTHON_SHELL, + ScriptLocation: 's3://bucketname/script', + PythonVersion: glue.PythonVersion.TWO, + }, + }); + }); + + test('Overridden Max Capacity should be 1', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxCapacity: 1, + }); + }); + }); + }); \ No newline at end of file From 7a7477729450ed347d4f1c2b553f7ee0bde4988d Mon Sep 17 00:00:00 2001 From: Prashanna B Date: Thu, 18 Jul 2024 07:42:29 +0000 Subject: [PATCH 42/51] Added unit test cases to the scala jobs --- .../test/scalaspark-etl-jobs.test.ts | 227 +++++++++++++ .../test/scalaspark-flex-etl-jobs.test.ts | 315 +++++++++++++++++- .../test/scalaspark-streaming-jobs.test.ts | 255 +++++++++++++- 3 files changed, 795 insertions(+), 2 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts index d033011e3d130..6326019a4bcf6 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts @@ -12,6 +12,7 @@ describe('Job', () => { let codeBucket: s3.IBucket; let job: glue.IJob; let className: string; + let sparkUIBucket: s3.Bucket; beforeEach(() => { stack = new cdk.Stack(); @@ -136,4 +137,230 @@ describe('Job', () => { }); }); + + describe('Create ScalaSpark ETL Job with optional properties', () => { + + beforeEach(() => { + job = new glue.ScalaSparkEtlJob(stack, 'ScalaSparkEtlJob', { + jobName: 'ScalaSparkEtlJob', + description: 'This is a description', + role, + script, + className, + glueVersion: glue.GlueVersion.V3_0, + continuousLogging: { enabled: false }, + workerType: glue.WorkerType.G_2X, + maxConcurrentRuns: 100, + timeout: cdk.Duration.hours(2), + connections: [glue.Connection.fromConnectionName(stack, 'Connection', 'connectionName')], + securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'SecurityConfig', 'securityConfigName'), + tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + numberOfWorkers: 2, + maxRetries: 2, + }); + }); + + test('Test job attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Custom Job Name and Description', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Name: 'ScalaSparkEtlJob', + Description: 'This is a description', + }); + }); + + test('Overriden Glue Version should be 3.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '3.0', + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + }), + }); + }); + + test('Overriden numberOfWorkers should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 2, + }); + }); + + test('Overriden WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: glue.WorkerType.G_2X, + }); + }); + + test('Overriden max retries should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxRetries: 2, + }); + }); + + test('Overriden max concurrent runs should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionProperty: { + MaxConcurrentRuns: 100, + }, + }); + }); + + test('Overriden timeout should be 2 hours', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Timeout: 120, + }); + }); + + test('Overriden connections should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Connections: { + Connections: ['connectionName'], + }, + }); + }); + + test('Overriden security configuration should be set', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + SecurityConfiguration: 'securityConfigName', + }); + }); + + test('Should have tags', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + }); + }); + }); + + describe('Create ScalaSpark ETL Job with extraJars and extraFiles', () => { + + beforeEach(() => { + job = new glue.ScalaSparkEtlJob(stack, 'ScalaSparkEtlJob', { + role, + script, + jobName: 'ScalaSparkEtlJob', + className, + extraJars: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), + 'prefix/file.jar'), + ], + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: glue.GlueVersion.V4_0, + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--enable-continuous-cloudwatch-log': 'true', + '--extra-jars': 's3://extra-jars-bucket/prefix/file.jar', + '--extra-files': 's3://extra-files-bucket/prefix/file.txt', + }), + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + + test('Default WorkerType should be G.1X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.1X', + }); + }); + }); + + describe('Override SparkUI properties for ScalaSpark ETL Job', () => { + + beforeEach(() => { + sparkUIBucket = new s3.Bucket(stack, 'sparkUIbucket', { bucketName: 'bucket-name' }); + job = new glue.ScalaSparkEtlJob(stack, 'ScalaSparkEtlJob', { + role, + script, + jobName: 'ScalaSparkEtlJob', + className, + sparkUI: { + bucket: sparkUIBucket, + prefix: '/prefix', + }, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: glue.GlueVersion.V4_0, + }); + }); + + test('Has Continuous Logging and SparkUIEnabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--enable-continuous-cloudwatch-log': 'true', + '--enable-spark-ui': 'true', + '--spark-event-logs-path': Match.objectLike({ + 'Fn::Join': [ + '', + [ + 's3://', + { Ref: Match.anyValue() }, + '/prefix/', + ], + ], + }), + }), + }); + }); + }); }); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts index e34afefea0b10..3eed0c7bebd74 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts @@ -2,7 +2,8 @@ import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import { Template } from 'aws-cdk-lib/assertions'; +import { Template, Match } from 'aws-cdk-lib/assertions'; +import { LogGroup } from 'aws-cdk-lib/aws-logs'; describe('Job', () => { let stack: cdk.Stack; @@ -11,6 +12,7 @@ describe('Job', () => { let codeBucket: s3.IBucket; let job: glue.IJob; let className: string; + let sparkUIBucket: s3.Bucket; beforeEach(() => { stack = new cdk.Stack(); @@ -53,5 +55,316 @@ describe('Job', () => { }); }); + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + }); + + describe('Create new ScalaSpark ETL Job with log override parameters', () => { + + beforeEach(() => { + job = new glue.ScalaSparkFlexEtlJob(stack, 'ScalaSparkFlexETLJob', { + jobName: 'ScalaSparkFlexETLJob', + role, + script, + continuousLogging: { + enabled: true, + quiet: true, + logGroup: new LogGroup(stack, 'logGroup', { + logGroupName: '/aws-glue/jobs/${job.jobName}', + }), + logStreamPrefix: 'logStreamPrefix', + conversionPattern: 'convert', + }, + className, + }); + }); + + test('Has Continuous Logging enabled with optional args', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--continuous-log-logGroup': Match.objectLike({ + Ref: Match.anyValue(), + }), + '--enable-continuous-cloudwatch-log': 'true', + '--enable-continuous-log-filter': 'true', + '--continuous-log-logStreamPrefix': 'logStreamPrefix', + '--continuous-log-conversionPattern': 'convert', + }), + }); + }); + + }); + + describe('Create new ScalaSpark ETL Flex Job with logging explicitly disabled', () => { + + beforeEach(() => { + job = new glue.ScalaSparkFlexEtlJob(stack, 'ScalaSparkFlexETLJob', { + jobName: 'ScalaSparkFlexETLJob', + role, + script, + continuousLogging: { + enabled: false, + }, + className, + }); + }); + + test('Has Continuous Logging Disabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: { + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + }, + }); + }); + }); + + describe('Create ScalaSpark Flex ETL Job with optional properties', () => { + + beforeEach(() => { + job = new glue.ScalaSparkFlexEtlJob(stack, 'ScalaSparkFlexEtlJob', { + jobName: 'ScalaSparkFlexEtlJob', + description: 'This is a description', + role, + script, + className, + glueVersion: glue.GlueVersion.V3_0, + continuousLogging: { enabled: false }, + workerType: glue.WorkerType.G_2X, + maxConcurrentRuns: 100, + timeout: cdk.Duration.hours(2), + connections: [glue.Connection.fromConnectionName(stack, 'Connection', 'connectionName')], + securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'SecurityConfig', 'securityConfigName'), + tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + numberOfWorkers: 2, + maxRetries: 2, + }); + }); + + test('Test job attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Custom Job Name and Description', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Name: 'ScalaSparkFlexEtlJob', + Description: 'This is a description', + }); + }); + + test('Overriden Glue Version should be 3.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '3.0', + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + }), + }); + }); + + test('Overriden numberOfWorkers should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 2, + }); + }); + + test('Overriden WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: glue.WorkerType.G_2X, + }); + }); + + test('Overriden max retries should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxRetries: 2, + }); + }); + + test('Overriden max concurrent runs should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionProperty: { + MaxConcurrentRuns: 100, + }, + }); + }); + + test('Overriden timeout should be 2 hours', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Timeout: 120, + }); + }); + + test('Overriden connections should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Connections: { + Connections: ['connectionName'], + }, + }); + }); + + test('Overriden security configuration should be set', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + SecurityConfiguration: 'securityConfigName', + }); + }); + + test('Should have tags', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + }); + }); + }); + + describe('Create ScalaSpark Flex ETL Job with extraJars and extraFiles', () => { + + beforeEach(() => { + job = new glue.ScalaSparkFlexEtlJob(stack, 'ScalaSparkFlexEtlJob', { + role, + script, + jobName: 'ScalaSparkFlexEtlJob', + className, + extraJars: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), + 'prefix/file.jar'), + ], + extraFiles: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraFilesBucket', 'extra-files-bucket'), + 'prefix/file.txt'), + ], + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: glue.GlueVersion.V4_0, + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--enable-continuous-cloudwatch-log': 'true', + '--extra-jars': 's3://extra-jars-bucket/prefix/file.jar', + '--extra-files': 's3://extra-files-bucket/prefix/file.txt', + }), + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + + test('Default WorkerType should be G.1X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: 'G.1X', + }); + }); + }); + + describe('Override SparkUI properties for ScalaSpark Flex ETL Job', () => { + + beforeEach(() => { + sparkUIBucket = new s3.Bucket(stack, 'sparkUIbucket', { bucketName: 'bucket-name' }); + job = new glue.ScalaSparkFlexEtlJob(stack, 'ScalaSparkFlexEtlJob', { + role, + script, + jobName: 'ScalaSparkFlexEtlJob', + className, + sparkUI: { + bucket: sparkUIBucket, + prefix: '/prefix', + }, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: glue.GlueVersion.V4_0, + }); + }); + + test('Has Continuous Logging and SparkUIEnabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--enable-continuous-cloudwatch-log': 'true', + '--enable-spark-ui': 'true', + '--spark-event-logs-path': Match.objectLike({ + 'Fn::Join': [ + '', + [ + 's3://', + { Ref: Match.anyValue() }, + '/prefix/', + ], + ], + }), + }), + }); + }); }); }); \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts index b6c44f3f0a154..ceb77d9aede7c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-streaming-jobs.test.ts @@ -2,7 +2,8 @@ import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import { Template } from 'aws-cdk-lib/assertions'; +import { Template, Match } from 'aws-cdk-lib/assertions'; +import { LogGroup } from 'aws-cdk-lib/aws-logs'; describe('Job', () => { let stack: cdk.Stack; @@ -11,6 +12,7 @@ describe('Job', () => { let codeBucket: s3.IBucket; let job: glue.IJob; let className: string; + let sparkUIBucket: s3.Bucket; beforeEach(() => { stack = new cdk.Stack(); @@ -52,5 +54,256 @@ describe('Job', () => { WorkerType: 'G.1X', }); }); + + test('Has Continuous Logging Enabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--enable-continuous-cloudwatch-log': 'true', + }), + }); + }); + + test('Default numberOfWorkers should be 10', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 10, + }); + }); + }); + + describe('Create new ScalaSpark Streaming Job with log override parameters', () => { + + beforeEach(() => { + job = new glue.ScalaSparkStreamingJob(stack, 'ScalaSparkStreamingJob', { + jobName: 'ScalaSparkStreamingJob', + role, + script, + continuousLogging: { + enabled: true, + quiet: true, + logGroup: new LogGroup(stack, 'logGroup', { + logGroupName: '/aws-glue/jobs/${job.jobName}', + }), + logStreamPrefix: 'logStreamPrefix', + conversionPattern: 'convert', + }, + className, + }); + }); + + test('Has Continuous Logging enabled with optional args', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--continuous-log-logGroup': Match.objectLike({ + Ref: Match.anyValue(), + }), + '--enable-continuous-cloudwatch-log': 'true', + '--enable-continuous-log-filter': 'true', + '--continuous-log-logStreamPrefix': 'logStreamPrefix', + '--continuous-log-conversionPattern': 'convert', + }), + }); + }); + + }); + + describe('Create new ScalaSpark Streaming Job with logging explicitly disabled', () => { + + beforeEach(() => { + job = new glue.ScalaSparkStreamingJob(stack, 'ScalaSparkStreamingJob', { + jobName: 'ScalaSparkStreamingJob', + role, + script, + continuousLogging: { + enabled: false, + }, + className, + }); + }); + + test('Has Continuous Logging Disabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: { + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + }, + }); + }); + + }); + + describe('Create ScalaSpark Streaming ETL Job with optional properties', () => { + + beforeEach(() => { + job = new glue.ScalaSparkStreamingJob(stack, 'ScalaSparkStreamingJob', { + jobName: 'ScalaSparkStreamingJob', + description: 'This is a description', + role, + script, + className, + glueVersion: glue.GlueVersion.V3_0, + continuousLogging: { enabled: false }, + workerType: glue.WorkerType.G_2X, + maxConcurrentRuns: 100, + timeout: cdk.Duration.hours(2), + connections: [glue.Connection.fromConnectionName(stack, 'Connection', 'connectionName')], + securityConfiguration: glue.SecurityConfiguration.fromSecurityConfigurationName(stack, 'SecurityConfig', 'securityConfigName'), + tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + numberOfWorkers: 2, + maxRetries: 2, + }); + }); + + test('Test job attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Custom Job Name and Description', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Name: 'ScalaSparkStreamingJob', + Description: 'This is a description', + }); + }); + + test('Overriden Glue Version should be 3.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: '3.0', + }); + }); + + test('Verify Default Arguemnts', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + }), + }); + }); + + test('Overriden numberOfWorkers should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + NumberOfWorkers: 2, + }); + }); + + test('Overriden WorkerType should be G.2X', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + WorkerType: glue.WorkerType.G_2X, + }); + }); + + test('Overriden max retries should be 2', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + MaxRetries: 2, + }); + }); + + test('Overriden max concurrent runs should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + ExecutionProperty: { + MaxConcurrentRuns: 100, + }, + }); + }); + + test('Overriden timeout should be 2 hours', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Timeout: 120, + }); + }); + + test('Overriden connections should be 100', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Connections: { + Connections: ['connectionName'], + }, + }); + }); + + test('Overriden security configuration should be set', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + SecurityConfiguration: 'securityConfigName', + }); + }); + + test('Should have tags', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + Tags: { + FirstTagName: 'FirstTagValue', + SecondTagName: 'SecondTagValue', + XTagName: 'XTagValue', + }, + }); + }); + }); + + describe('Override SparkUI properties for ScalaSpark Streaming ETL Job', () => { + + beforeEach(() => { + sparkUIBucket = new s3.Bucket(stack, 'sparkUIbucket', { bucketName: 'bucket-name' }); + job = new glue.ScalaSparkStreamingJob(stack, 'ScalaSparkStreamingJob', { + role, + script, + jobName: 'ScalaSparkStreamingJob', + className, + sparkUI: { + bucket: sparkUIBucket, + prefix: '/prefix', + }, + }); + }); + + test('Test default attributes', () => { + expect(job.jobArn).toEqual(stack.formatArn({ + service: 'glue', + resource: 'job', + resourceName: job.jobName, + })); + expect(job.grantPrincipal).toEqual(role); + }); + + test('Default Glue Version should be 4.0', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + GlueVersion: glue.GlueVersion.V4_0, + }); + }); + + test('Has Continuous Logging and SparkUIEnabled', () => { + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { + DefaultArguments: Match.objectLike({ + '--enable-metrics': '', + '--enable-observability-metrics': 'true', + '--job-language': 'scala', + '--enable-continuous-cloudwatch-log': 'true', + '--enable-spark-ui': 'true', + '--spark-event-logs-path': Match.objectLike({ + 'Fn::Join': [ + '', + [ + 's3://', + { Ref: Match.anyValue() }, + '/prefix/', + ], + ], + }), + }), + }); + }); }); }); From b601916be440716190c4504e8b0bb00626664f4b Mon Sep 17 00:00:00 2001 From: Prashanna B Date: Thu, 18 Jul 2024 17:09:31 +0000 Subject: [PATCH 43/51] fixed the unit test for scala etl and flex etl --- .../aws-glue-alpha/test/scalaspark-etl-jobs.test.ts | 3 +-- .../aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts index 6326019a4bcf6..fa11b188aed67 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts @@ -39,7 +39,7 @@ describe('Job', () => { test('Default Glue Version should be 4.0', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - GlueVersion: '4.0', + GlueVersion: glue.GlueVersion.V4_0, }); }); @@ -292,7 +292,6 @@ describe('Job', () => { '--job-language': 'scala', '--enable-continuous-cloudwatch-log': 'true', '--extra-jars': 's3://extra-jars-bucket/prefix/file.jar', - '--extra-files': 's3://extra-files-bucket/prefix/file.txt', }), }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts index 3eed0c7bebd74..6f51145261f87 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts @@ -282,9 +282,9 @@ describe('Job', () => { expect(job.grantPrincipal).toEqual(role); }); - test('Default Glue Version should be 4.0', () => { + test('Default Glue Version should be 3.0', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - GlueVersion: glue.GlueVersion.V4_0, + GlueVersion: glue.GlueVersion.V3_0, }); }); @@ -339,9 +339,9 @@ describe('Job', () => { expect(job.grantPrincipal).toEqual(role); }); - test('Default Glue Version should be 4.0', () => { + test('Default Glue Version should be 3.0', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - GlueVersion: glue.GlueVersion.V4_0, + GlueVersion: glue.GlueVersion.V3_0, }); }); From cdba008c3006759d55c097bd204aa34ef93f5607 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Thu, 18 Jul 2024 18:03:10 +0000 Subject: [PATCH 44/51] Fixing scala etl and pyspark streaming unit tests --- .../test/pyspark-streaming-jobs.test.ts | 20 +++++++++---------- .../test/scalaspark-etl-jobs.test.ts | 3 +-- .../test/scalaspark-flex-etl-jobs.test.ts | 8 ++++---- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts index 5c232a61e6566..959484f42aa69 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/pyspark-streaming-jobs.test.ts @@ -73,7 +73,7 @@ describe('Job', () => { test('Default Python version should be 3', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { Command: { - Name: glue.JobType.ETL, + Name: glue.JobType.STREAMING, ScriptLocation: 's3://bucketname/script', PythonVersion: glue.PythonVersion.THREE, }, @@ -81,7 +81,7 @@ describe('Job', () => { }); }); - describe('Create new PySpark ETL Job with log override parameters', () => { + describe('Create new PySpark Streaming Job with log override parameters', () => { beforeEach(() => { job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { @@ -144,7 +144,7 @@ describe('Job', () => { }); - describe('Create PySpark ETL Job with G2 worker type with 2 workers', () => { + describe('Create PySpark Streaming Job with G2 worker type with 2 workers', () => { beforeEach(() => { job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { @@ -197,7 +197,7 @@ describe('Job', () => { test('Default Python version should be 3', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { Command: { - Name: glue.JobType.ETL, + Name: glue.JobType.STREAMING, ScriptLocation: 's3://bucketname/script', PythonVersion: glue.PythonVersion.THREE, }, @@ -205,7 +205,7 @@ describe('Job', () => { }); }); - describe('Create PySpark ETL Job with G4 worker type with 4 workers', () => { + describe('Create PySpark Streaming Job with G4 worker type with 4 workers', () => { beforeEach(() => { job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { @@ -256,7 +256,7 @@ describe('Job', () => { }); }); - describe('Create PySpark ETL Job with G8 worker type and 8 workers', () => { + describe('Create PySpark Streaming Job with G8 worker type and 8 workers', () => { beforeEach(() => { job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { @@ -307,7 +307,7 @@ describe('Job', () => { }); }); - describe('Override SparkUI properties for PySpark ETL Job', () => { + describe('Override SparkUI properties for PySpark Streaming Job', () => { beforeEach(() => { sparkUIBucket = new s3.Bucket(stack, 'sparkUIbucket', { bucketName: 'bucket-name' }); @@ -382,7 +382,7 @@ describe('Job', () => { }); - describe('Create PySpark ETL Job with extraPythonFiles and extraFiles', () => { + describe('Create PySpark Streaming Job with extraPythonFiles and extraFiles', () => { beforeEach(() => { job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { @@ -443,7 +443,7 @@ describe('Job', () => { }); }); - describe('Create PySpark ETL Job with optional properties', () => { + describe('Create PySpark Streaming Job with optional properties', () => { beforeEach(() => { job = new glue.PySparkStreamingJob(stack, 'PySparkStreamingJob', { @@ -559,7 +559,7 @@ describe('Job', () => { test('Default Python version should be 3', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { Command: { - Name: glue.JobType.ETL, + Name: glue.JobType.STREAMING, ScriptLocation: 's3://bucketname/script', PythonVersion: glue.PythonVersion.THREE, }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts index 6326019a4bcf6..900e1d21ceec2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-etl-jobs.test.ts @@ -253,7 +253,7 @@ describe('Job', () => { }); }); - describe('Create ScalaSpark ETL Job with extraJars and extraFiles', () => { + describe('Create ScalaSpark ETL Job with extraJars', () => { beforeEach(() => { job = new glue.ScalaSparkEtlJob(stack, 'ScalaSparkEtlJob', { @@ -292,7 +292,6 @@ describe('Job', () => { '--job-language': 'scala', '--enable-continuous-cloudwatch-log': 'true', '--extra-jars': 's3://extra-jars-bucket/prefix/file.jar', - '--extra-files': 's3://extra-files-bucket/prefix/file.txt', }), }); }); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts index 3eed0c7bebd74..6f51145261f87 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/scalaspark-flex-etl-jobs.test.ts @@ -282,9 +282,9 @@ describe('Job', () => { expect(job.grantPrincipal).toEqual(role); }); - test('Default Glue Version should be 4.0', () => { + test('Default Glue Version should be 3.0', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - GlueVersion: glue.GlueVersion.V4_0, + GlueVersion: glue.GlueVersion.V3_0, }); }); @@ -339,9 +339,9 @@ describe('Job', () => { expect(job.grantPrincipal).toEqual(role); }); - test('Default Glue Version should be 4.0', () => { + test('Default Glue Version should be 3.0', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - GlueVersion: glue.GlueVersion.V4_0, + GlueVersion: glue.GlueVersion.V3_0, }); }); From 3943206565f8b63d99f5cc0cb5c659979235b6de Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 23 Jul 2024 14:57:17 +0000 Subject: [PATCH 45/51] WorkerType and numberofWorkers defaults are enforced when not set --- packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts index 0822ae797ed24..99271c942d30c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/ray-job.ts @@ -75,10 +75,6 @@ export class RayJob extends Job { throw new Error('Ray jobs only support Z.2X worker type'); }; - if ((!props.workerType && props.numberOfWorkers !== undefined) || (props.workerType && props.numberOfWorkers === undefined)) { - throw new Error('Both workerType and numberOFWorkers must be set'); - } - const jobResource = new CfnJob(this, 'Resource', { name: props.jobName, description: props.description, From c077b11e1460df3e48684d94dc7ea775012833b4 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Tue, 23 Jul 2024 15:16:20 +0000 Subject: [PATCH 46/51] Fix tests --- .../aws-glue-alpha/test/ray-job.test.ts | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts index 1d0bf4e184207..1d261058af566 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/ray-job.test.ts @@ -62,12 +62,6 @@ describe('Job', () => { }), }); }); - - test('Default numberOfWorkers should be 3', () => { - Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { - NumberOfWorkers: 3, - }); - }); }); describe('Create new Ray Job with log override parameters', () => { @@ -264,25 +258,5 @@ describe('Job', () => { }); }).toThrow(new Error('Ray jobs only support Z.2X worker type')); }); - - test('Create Ray Job overriding only workerType to cause an Error', () => { - expect(() => { - job = new glue.RayJob(stack, 'RayJob', { - role, - script, - workerType: glue.WorkerType.Z_2X, - }); - }).toThrow(new Error('Both workerType and numberOFWorkers must be set')); - }); - - test('Create Ray Job overriding only numberOfWorkers to cause an Error', () => { - expect(() => { - job = new glue.RayJob(stack, 'RayJob', { - role, - script, - numberOfWorkers: 5, - }); - }).toThrow(new Error('Both workerType and numberOFWorkers must be set')); - }); }); }); \ No newline at end of file From dbbaabc8f355751781167e718b9808003790c249 Mon Sep 17 00:00:00 2001 From: "Janardhan (Janny) Molumuri" Date: Wed, 24 Jul 2024 00:34:47 +0000 Subject: [PATCH 47/51] Updated snapshots --- .../aws-glue-job-python-shell.assets.json | 4 +- .../aws-glue-job-python-shell.template.json | 17 +- .../manifest.json | 2 +- .../tree.json | 17 +- .../GlueWorkflowTriggerStack.assets.json | 32 ++ .../GlueWorkflowTriggerStack.template.json | 244 ++++++++++ ...9be7858a12b228a2ae6e5c10faccd9097b1e855.py | 1 + ...efaultTestDeployAssert43E79173.assets.json | 19 + ...aultTestDeployAssert43E79173.template.json | 36 ++ .../test/integ.workflow.js.snapshot/cdk.out | 1 + .../integ.workflow.js.snapshot/integ.json | 12 + .../integ.workflow.js.snapshot/manifest.json | 155 ++++++ .../test/integ.workflow.js.snapshot/tree.json | 448 ++++++++++++++++++ 13 files changed, 981 insertions(+), 7 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/GlueWorkflowTriggerStack.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/GlueWorkflowTriggerStack.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.template.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/cdk.out create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/integ.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/manifest.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/tree.json diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.assets.json index 0a415cd107153..522babd056beb 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.assets.json @@ -14,7 +14,7 @@ } } }, - "aeda11f7bb7dfbd52c66176f2e7ae14f20571f1f22ab7988a59bc714daf278a0": { + "c75d6d44cca641f11b82111a563ba198269fa0483d583cbffd578d0301e9edaf": { "source": { "path": "aws-glue-job-python-shell.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "aeda11f7bb7dfbd52c66176f2e7ae14f20571f1f22ab7988a59bc714daf278a0.json", + "objectKey": "c75d6d44cca641f11b82111a563ba198269fa0483d583cbffd578d0301e9edaf.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.template.json index 9c752c984fef2..d98d7d4485e3b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/aws-glue-job-python-shell.template.json @@ -108,7 +108,11 @@ } }, "DefaultArguments": { - "library-set": "analytics" + "--job-language": "python", + "library-set": "analytics", + "--enable-continuous-cloudwatch-log": "true", + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "GlueVersion": "3.0", "MaxCapacity": 0.0625, @@ -140,7 +144,12 @@ ] } }, - "DefaultArguments": {}, + "DefaultArguments": { + "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", + "--enable-metrics": "", + "--enable-observability-metrics": "true" + }, "GlueVersion": "1.0", "MaxCapacity": 0.0625, "MaxRetries": 0, @@ -172,7 +181,11 @@ } }, "DefaultArguments": { + "--job-language": "python", "library-set": "analytics", + "--enable-continuous-cloudwatch-log": "true", + "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/manifest.json index 24a56a14662e5..026dce44eec16 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/manifest.json @@ -18,7 +18,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/aeda11f7bb7dfbd52c66176f2e7ae14f20571f1f22ab7988a59bc714daf278a0.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/c75d6d44cca641f11b82111a563ba198269fa0483d583cbffd578d0301e9edaf.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/tree.json index 8f39bfe79fe3c..4c23b18fce55d 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.job-python-shell.js.snapshot/tree.json @@ -193,7 +193,11 @@ "pythonVersion": "3.9" }, "defaultArguments": { - "library-set": "analytics" + "--job-language": "python", + "library-set": "analytics", + "--enable-continuous-cloudwatch-log": "true", + "--enable-metrics": "", + "--enable-observability-metrics": "true" }, "glueVersion": "3.0", "maxCapacity": 0.0625, @@ -243,7 +247,12 @@ }, "pythonVersion": "3" }, - "defaultArguments": {}, + "defaultArguments": { + "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", + "--enable-metrics": "", + "--enable-observability-metrics": "true" + }, "glueVersion": "1.0", "maxCapacity": 0.0625, "maxRetries": 0, @@ -293,7 +302,11 @@ "pythonVersion": "3.9" }, "defaultArguments": { + "--job-language": "python", "library-set": "analytics", + "--enable-continuous-cloudwatch-log": "true", + "--enable-metrics": "", + "--enable-observability-metrics": "true", "arg1": "value1", "arg2": "value2" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/GlueWorkflowTriggerStack.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/GlueWorkflowTriggerStack.assets.json new file mode 100644 index 0000000000000..020d92b9ce3ed --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/GlueWorkflowTriggerStack.assets.json @@ -0,0 +1,32 @@ +{ + "version": "36.0.0", + "files": { + "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": { + "source": { + "path": "asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + }, + "db15b89b0de33d7503c531cae5fa3f18506eb8982953470211e04f53dfe9a2da": { + "source": { + "path": "GlueWorkflowTriggerStack.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "db15b89b0de33d7503c531cae5fa3f18506eb8982953470211e04f53dfe9a2da.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/GlueWorkflowTriggerStack.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/GlueWorkflowTriggerStack.template.json new file mode 100644 index 0000000000000..371975c8eeeb2 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/GlueWorkflowTriggerStack.template.json @@ -0,0 +1,244 @@ +{ + "Resources": { + "Workflow193EF7C1": { + "Type": "AWS::Glue::Workflow", + "Properties": { + "Description": "MyWorkflow" + } + }, + "WorkflowOnDemandTriggerEE8E75A1": { + "Type": "AWS::Glue::Trigger", + "Properties": { + "Actions": [ + { + "JobName": { + "Ref": "InboundJobEDA3CBF4" + } + } + ], + "Type": "ON_DEMAND", + "WorkflowName": { + "Ref": "Workflow193EF7C1" + } + } + }, + "WorkflowConditionalTrigger133C0CA8": { + "Type": "AWS::Glue::Trigger", + "Properties": { + "Actions": [ + { + "JobName": { + "Ref": "OutboundJobB5826414" + } + } + ], + "EventBatchingCondition": { + "BatchSize": 1, + "BatchWindow": 900 + }, + "Predicate": { + "Conditions": [ + { + "JobName": { + "Ref": "InboundJobEDA3CBF4" + }, + "LogicalOperator": "EQUALS", + "State": "SUCCEEDED" + } + ] + }, + "Type": "CONDITIONAL", + "WorkflowName": { + "Ref": "Workflow193EF7C1" + } + } + }, + "JobRole014917C6": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + } + } + }, + "JobRoleDefaultPolicy5DE0D8F9": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "JobRoleDefaultPolicy5DE0D8F9", + "Roles": [ + { + "Ref": "JobRole014917C6" + } + ] + } + }, + "OutboundJobB5826414": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueetl", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", + "--enable-metrics": "", + "--enable-observability-metrics": "true" + }, + "GlueVersion": "4.0", + "NumberOfWorkers": 2, + "Role": { + "Fn::GetAtt": [ + "JobRole014917C6", + "Arn" + ] + }, + "WorkerType": "G.2X" + } + }, + "InboundJobEDA3CBF4": { + "Type": "AWS::Glue::Job", + "Properties": { + "Command": { + "Name": "glueetl", + "PythonVersion": "3", + "ScriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + } + }, + "DefaultArguments": { + "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", + "--enable-metrics": "", + "--enable-observability-metrics": "true" + }, + "GlueVersion": "4.0", + "NumberOfWorkers": 2, + "Role": { + "Fn::GetAtt": [ + "JobRole014917C6", + "Arn" + ] + }, + "WorkerType": "G.2X" + } + } + }, + "Outputs": { + "WorkflowName": { + "Value": { + "Ref": "Workflow193EF7C1" + } + } + }, + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py new file mode 100644 index 0000000000000..e75154b7c390f --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/asset.432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py @@ -0,0 +1 @@ +print("hello world") \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.assets.json new file mode 100644 index 0000000000000..d2484f5013f09 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.assets.json @@ -0,0 +1,19 @@ +{ + "version": "36.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/cdk.out new file mode 100644 index 0000000000000..1f0068d32659a --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/cdk.out @@ -0,0 +1 @@ +{"version":"36.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/integ.json new file mode 100644 index 0000000000000..dc0019765cecf --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/integ.json @@ -0,0 +1,12 @@ +{ + "version": "36.0.0", + "testCases": { + "aws-cdk-glue-workflow-trigger-integ/DefaultTest": { + "stacks": [ + "GlueWorkflowTriggerStack" + ], + "assertionStack": "aws-cdk-glue-workflow-trigger-integ/DefaultTest/DeployAssert", + "assertionStackName": "awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173" + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/manifest.json new file mode 100644 index 0000000000000..9b01d7d71edae --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/manifest.json @@ -0,0 +1,155 @@ +{ + "version": "36.0.0", + "artifacts": { + "GlueWorkflowTriggerStack.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "GlueWorkflowTriggerStack.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "GlueWorkflowTriggerStack": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "GlueWorkflowTriggerStack.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/db15b89b0de33d7503c531cae5fa3f18506eb8982953470211e04f53dfe9a2da.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "GlueWorkflowTriggerStack.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "GlueWorkflowTriggerStack.assets" + ], + "metadata": { + "/GlueWorkflowTriggerStack/Workflow/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "Workflow193EF7C1" + } + ], + "/GlueWorkflowTriggerStack/Workflow/OnDemandTrigger": [ + { + "type": "aws:cdk:logicalId", + "data": "WorkflowOnDemandTriggerEE8E75A1" + } + ], + "/GlueWorkflowTriggerStack/Workflow/ConditionalTrigger": [ + { + "type": "aws:cdk:logicalId", + "data": "WorkflowConditionalTrigger133C0CA8" + } + ], + "/GlueWorkflowTriggerStack/JobRole/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "JobRole014917C6" + } + ], + "/GlueWorkflowTriggerStack/JobRole/DefaultPolicy/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "JobRoleDefaultPolicy5DE0D8F9" + } + ], + "/GlueWorkflowTriggerStack/OutboundJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "OutboundJobB5826414" + } + ], + "/GlueWorkflowTriggerStack/InboundJob/Resource": [ + { + "type": "aws:cdk:logicalId", + "data": "InboundJobEDA3CBF4" + } + ], + "/GlueWorkflowTriggerStack/WorkflowName": [ + { + "type": "aws:cdk:logicalId", + "data": "WorkflowName" + } + ], + "/GlueWorkflowTriggerStack/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/GlueWorkflowTriggerStack/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "GlueWorkflowTriggerStack" + }, + "awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.template.json", + "terminationProtection": false, + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awscdkglueworkflowtriggerintegDefaultTestDeployAssert43E79173.assets" + ], + "metadata": { + "/aws-cdk-glue-workflow-trigger-integ/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-cdk-glue-workflow-trigger-integ/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-cdk-glue-workflow-trigger-integ/DefaultTest/DeployAssert" + }, + "Tree": { + "type": "cdk:tree", + "properties": { + "file": "tree.json" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/tree.json new file mode 100644 index 0000000000000..988a73eab37e0 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.workflow.js.snapshot/tree.json @@ -0,0 +1,448 @@ +{ + "version": "tree-0.1", + "tree": { + "id": "App", + "path": "", + "children": { + "GlueWorkflowTriggerStack": { + "id": "GlueWorkflowTriggerStack", + "path": "GlueWorkflowTriggerStack", + "children": { + "Workflow": { + "id": "Workflow", + "path": "GlueWorkflowTriggerStack/Workflow", + "children": { + "Resource": { + "id": "Resource", + "path": "GlueWorkflowTriggerStack/Workflow/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Workflow", + "aws:cdk:cloudformation:props": { + "description": "MyWorkflow" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnWorkflow", + "version": "0.0.0" + } + }, + "OnDemandTrigger": { + "id": "OnDemandTrigger", + "path": "GlueWorkflowTriggerStack/Workflow/OnDemandTrigger", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Trigger", + "aws:cdk:cloudformation:props": { + "actions": [ + { + "jobName": { + "Ref": "InboundJobEDA3CBF4" + } + } + ], + "type": "ON_DEMAND", + "workflowName": { + "Ref": "Workflow193EF7C1" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnTrigger", + "version": "0.0.0" + } + }, + "ConditionalTrigger": { + "id": "ConditionalTrigger", + "path": "GlueWorkflowTriggerStack/Workflow/ConditionalTrigger", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Trigger", + "aws:cdk:cloudformation:props": { + "actions": [ + { + "jobName": { + "Ref": "OutboundJobB5826414" + } + } + ], + "eventBatchingCondition": { + "batchSize": 1, + "batchWindow": 900 + }, + "predicate": { + "conditions": [ + { + "logicalOperator": "EQUALS", + "jobName": { + "Ref": "InboundJobEDA3CBF4" + }, + "state": "SUCCEEDED" + } + ] + }, + "type": "CONDITIONAL", + "workflowName": { + "Ref": "Workflow193EF7C1" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnTrigger", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.Workflow", + "version": "0.0.0" + } + }, + "JobRole": { + "id": "JobRole", + "path": "GlueWorkflowTriggerStack/JobRole", + "children": { + "ImportJobRole": { + "id": "ImportJobRole", + "path": "GlueWorkflowTriggerStack/JobRole/ImportJobRole", + "constructInfo": { + "fqn": "aws-cdk-lib.Resource", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "GlueWorkflowTriggerStack/JobRole/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Role", + "aws:cdk:cloudformation:props": { + "assumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "glue.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnRole", + "version": "0.0.0" + } + }, + "DefaultPolicy": { + "id": "DefaultPolicy", + "path": "GlueWorkflowTriggerStack/JobRole/DefaultPolicy", + "children": { + "Resource": { + "id": "Resource", + "path": "GlueWorkflowTriggerStack/JobRole/DefaultPolicy/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::IAM::Policy", + "aws:cdk:cloudformation:props": { + "policyDocument": { + "Statement": [ + { + "Action": [ + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/*" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":s3:::", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "policyName": "JobRoleDefaultPolicy5DE0D8F9", + "roles": [ + { + "Ref": "JobRole014917C6" + } + ] + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Policy", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_iam.Role", + "version": "0.0.0" + } + }, + "OutboundJob": { + "id": "OutboundJob", + "path": "GlueWorkflowTriggerStack/OutboundJob", + "children": { + "Code2907ea7be4a583708cfffc21b3df1dfa": { + "id": "Code2907ea7be4a583708cfffc21b3df1dfa", + "path": "GlueWorkflowTriggerStack/OutboundJob/Code2907ea7be4a583708cfffc21b3df1dfa", + "children": { + "Stage": { + "id": "Stage", + "path": "GlueWorkflowTriggerStack/OutboundJob/Code2907ea7be4a583708cfffc21b3df1dfa/Stage", + "constructInfo": { + "fqn": "aws-cdk-lib.AssetStaging", + "version": "0.0.0" + } + }, + "AssetBucket": { + "id": "AssetBucket", + "path": "GlueWorkflowTriggerStack/OutboundJob/Code2907ea7be4a583708cfffc21b3df1dfa/AssetBucket", + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3.BucketBase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_s3_assets.Asset", + "version": "0.0.0" + } + }, + "Resource": { + "id": "Resource", + "path": "GlueWorkflowTriggerStack/OutboundJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", + "--enable-metrics": "", + "--enable-observability-metrics": "true" + }, + "glueVersion": "4.0", + "numberOfWorkers": 2, + "role": { + "Fn::GetAtt": [ + "JobRole014917C6", + "Arn" + ] + }, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkEtlJob", + "version": "0.0.0" + } + }, + "InboundJob": { + "id": "InboundJob", + "path": "GlueWorkflowTriggerStack/InboundJob", + "children": { + "Resource": { + "id": "Resource", + "path": "GlueWorkflowTriggerStack/InboundJob/Resource", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Job", + "aws:cdk:cloudformation:props": { + "command": { + "name": "glueetl", + "scriptLocation": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}" + }, + "/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py" + ] + ] + }, + "pythonVersion": "3" + }, + "defaultArguments": { + "--job-language": "python", + "--enable-continuous-cloudwatch-log": "true", + "--enable-metrics": "", + "--enable-observability-metrics": "true" + }, + "glueVersion": "4.0", + "numberOfWorkers": 2, + "role": { + "Fn::GetAtt": [ + "JobRole014917C6", + "Arn" + ] + }, + "workerType": "G.2X" + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnJob", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.PySparkEtlJob", + "version": "0.0.0" + } + }, + "WorkflowName": { + "id": "WorkflowName", + "path": "GlueWorkflowTriggerStack/WorkflowName", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnOutput", + "version": "0.0.0" + } + }, + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "GlueWorkflowTriggerStack/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "GlueWorkflowTriggerStack/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-cdk-glue-workflow-trigger-integ": { + "id": "aws-cdk-glue-workflow-trigger-integ", + "path": "aws-cdk-glue-workflow-trigger-integ", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-cdk-glue-workflow-trigger-integ/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-cdk-glue-workflow-trigger-integ/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-cdk-glue-workflow-trigger-integ/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-cdk-glue-workflow-trigger-integ/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-cdk-glue-workflow-trigger-integ/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" + } + }, + "Tree": { + "id": "Tree", + "path": "Tree", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.3.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" + } + } +} \ No newline at end of file From 7077f9857f5bf8a0429895ee133efd70ccffd91e Mon Sep 17 00:00:00 2001 From: Natalie White Date: Mon, 29 Jul 2024 23:22:34 +0000 Subject: [PATCH 48/51] Resolve glue alpha README compilation errors --- packages/@aws-cdk/aws-glue-alpha/README.md | 1124 ++++---------------- 1 file changed, 188 insertions(+), 936 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index 9595c8e15f939..ed9229ce373c9 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -93,14 +93,18 @@ You can find more details about version, worker type and other features in [Glue's public documentation](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html). ```ts -glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-jar'), +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + +new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { + script: glue.Code.fromBucket(codeBucket, 'script'); className: 'com.example.HelloWorld', role: iam.IRole, }); -glue.pySparkEtlJob(this, 'pySparkEtlJob', { - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), +new glue.PySparkEtlJob(this, 'PySparkEtlJob', { + script: glue.Code.fromBucket(codeBucket, 'script'); role: iam.IRole, }); ``` @@ -108,23 +112,32 @@ glue.pySparkEtlJob(this, 'pySparkEtlJob', { Optional override examples: ```ts -glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { +import * as cdk from 'aws-cdk-lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + +new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { glueVersion: glue.GlueVersion.V3_0, - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-script'), + script: glue.Code.fromBucket(codeBucket, 'script'); className: 'com.example.HelloWorld', - extraJars: [glue.Code.fromBucket('bucket-name', 'path-to-extra-jars'),], - description: 'an example Scala Spark ETL job', + extraJars: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), + 'prefix/file.jar'), + ] numberOfWorkers: 20, - workerType: glue.WorkerType.G8X, + workerType: glue.WorkerType.G_8X, timeout: cdk.Duration.minutes(15), role: iam.IRole, }); -glue.pySparkEtlJob(this, 'pySparkEtlJob', { +new glue.PySparkEtlJob(this, 'PySparkEtlJob', { jobType: glue.JobType.ETL, glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.3_9, - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + script: glue.Code.fromBucket(codeBucket, 'script'); description: 'an example pySpark ETL job', numberOfWorkers: 20, workerType: glue.WorkerType.G8X, @@ -133,231 +146,6 @@ glue.pySparkEtlJob(this, 'pySparkEtlJob', { }); ``` -Scala Spark ETL Job Property Interface: - -```ts -ScalaSparkEtlJobProps{ - /** - * Script Code Location (required) - * Script to run when the Glue Job executes. Can be uploaded - * from the local directory structure using fromAsset - * or referenced via S3 location using fromBucket - * */ - script: glue.Code; - - /** - * Class name (required for Scala) - * Package and class name for the entry point of Glue job execution for - * Java scripts - * */ - className: string; - - /** - * Extra Jars S3 URL (optional) - * S3 URL where additional jar dependencies are located - */ - extraJars?: string[]; - - /** - * IAM Role (required) - * IAM Role to use for Glue job execution - * Must be specified by the developer because the L2 doesn't have visibility - * into the actions the script(s) take during the job execution - * */ - role: iam.IRole; - - /** - * Name of the Glue job (optional) - * Developer-specified name of the Glue job - * */ - name?: string; - - /** - * Description (optional) - * Developer-specified description of the Glue job - * */ - description?: string; - - /** - * Number of Workers (optional) - * Number of workers for Glue to use during job execution - * @default 10 - * */ - numberOrWorkers?: int; - - /** - * Worker Type (optional) - * Type of Worker for Glue to use during job execution - * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_2X - * */ - workerType?: glue.WorkerType; - - /** - * Max Concurrent Runs (optional) - * The maximum number of runs this Glue job can concurrently run - * @default 1 - * */ - maxConcurrentRuns?: int; - - /** - * Default Arguments (optional) - * The default arguments for every run of this Glue job, - * specified as name-value pairs. - * */ - defaultArguments?: {[key: string], string }[]; - - /** - * Connections (optional) - * List of connections to use for this Glue job - * */ - connections?: IConnection[]; - - /** - * Max Retries (optional) - * Maximum number of retry attempts Glue performs - * if the job fails - * @default 0 - * */ - maxRetries?: int; - - /** - * Timeout (optional) - * Timeout for the Glue job, specified in minutes - * @default 2880 (2 days for non-streaming) - * */ - timeout?: cdk.Duration; - - /** - * Security Configuration (optional) - * Defines the encryption options for the Glue job - * */ - securityConfiguration?: ISecurityConfiguration; - - /** - * Tags (optional) - * A list of key:value pairs of tags to apply to this Glue job resource - * */ - tags?: {[key: string], string }[]; - - /** - * Glue Version - * The version of Glue to use to execute this job - * @default 3.0 for ETL - * */ - glueVersion?: glue.GlueVersion; -} -``` - -pySpark ETL Job Property Interface: - -```ts -pySparkEtlJobProps{ - /** - * Script Code Location (required) - * Script to run when the Glue job executes. Can be uploaded - * from the local directory structure using fromAsset - * or referenced via S3 location using fromBucket - * */ - script: glue.Code; - - /** - * IAM Role (required) - * IAM Role to use for Glue job execution - * Must be specified by the developer because the L2 doesn't have visibility - * into the actions the script(s) takes during the job execution - * */ - role: iam.IRole; - - /** - * Name of the Glue job (optional) - * Developer-specified name of the Glue job - * */ - name?: string; - - /** - * Description (optional) - * Developer-specified description of the Glue job - * */ - description?: string; - - /** - * Extra Jars S3 URL (optional) - * S3 URL where additional jar dependencies are located - */ - extraJars?: string[]; - - /** - * Number of Workers (optional) - * Number of workers for Glue to use during job execution - * @default 10 - * */ - numberOrWorkers?: int; - - /** - * Worker Type (optional) - * Type of Worker for Glue to use during job execution - * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_2X - * */ - workerType?: glue.WorkerType; - - /** - * Max Concurrent Runs (optional) - * The maximum number of runs this Glue job can concurrently run - * @default 1 - * */ - maxConcurrentRuns?: int; - - /** - * Default Arguments (optional) - * The default arguments for every run of this Glue job, - * specified as name-value pairs. - * */ - defaultArguments?: {[key: string], string }[]; - - /** - * Connections (optional) - * List of connections to use for this Glue job - * */ - connections?: IConnection[]; - - /** - * Max Retries (optional) - * Maximum number of retry attempts Glue performs - * if the job fails - * @default 0 - * */ - maxRetries?: int; - - /** - * Timeout (optional) - * Timeout for the Glue job, specified in minutes - * @default 2880 (2 days for non-streaming) - * */ - timeout?: cdk.Duration; - - /** - * Security Configuration (optional) - * Defines the encryption options for the Glue job - * */ - securityConfiguration?: ISecurityConfiguration; - - /** - * Tags (optional) - * A list of key:value pairs of tags to apply to this Glue job resource - * */ - tags?: {[key: string], string }[]; - - /** - * Glue Version - * The version of Glue to use to execute this job - * @default 3.0 for ETL - * */ - glueVersion?: glue.GlueVersion; -} -``` - **Streaming Jobs** Streaming jobs are similar to ETL jobs, except that they perform ETL on data @@ -370,14 +158,18 @@ The following best practice features are enabled by default: `—enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log`. ```ts +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + new glue.pySparkStreamingJob(this, 'pySparkStreamingJob', { - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + script: glue.Code.fromBucket(codeBucket, 'script'); role: iam.IRole, }); new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-jar'), + script: glue.Code.fromBucket(codeBucket, 'script'); className: 'com.example.HelloWorld', role: iam.IRole, }); @@ -387,10 +179,16 @@ new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { Optional override examples: ```ts +import * as cdk from 'aws-cdk-lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + new glue.pySparkStreamingJob(this, 'pySparkStreamingJob', { glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.3_9, - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + pythonVersion: glue.PythonVersion.THREE_NINE, + script: glue.Code.fromBucket(codeBucket, 'script'); description: 'an example Python Streaming job', numberOfWorkers: 20, workerType: glue.WorkerType.G8X, @@ -400,9 +198,13 @@ new glue.pySparkStreamingJob(this, 'pySparkStreamingJob', { new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.3_9, - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-script'), - extraJars: [glue.Code.fromBucket('bucket-name', 'path-to-extra-jars'),], + pythonVersion: glue.PythonVersion.THREE_NINE, + script: glue.Code.fromBucket(codeBucket, 'script'); + extraJars: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), + 'prefix/file.jar'), + ] className: 'com.example.HelloWorld', description: 'an example Python Streaming job', numberOfWorkers: 20, @@ -412,223 +214,6 @@ new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { }); ``` -Scala Spark Streaming Job Property Interface: - -```ts -ScalaSparkStreamingJobProps{ - /** - * Script Code Location (required) - * Script to run when the Glue job executes. Can be uploaded - * from the local directory structure using fromAsset - * or referenced via S3 location using fromBucket - * */ - script: glue.Code; - - /** - * Class name (required for Scala scripts) - * Package and class name for the entry point of Glue job execution for - * Java scripts - * */ - className: string; - - /** - * IAM Role (required) - * IAM Role to use for Glue job execution - * Must be specified by the developer because the L2 doesn't have visibility - * into the actions the script(s) take during the job execution - * */ - role: iam.IRole; - - /** - * Name of the Glue job (optional) - * Developer-specified name of the Glue job - * */ - name?: string; - - /** - * Extra Jars S3 URL (optional) - * S3 URL where additional jar dependencies are located - */ - extraJars?: string[]; - - /** - * Description (optional) - * Developer-specified description of the Glue job - * */ - description?: string; - - /** - * Number of Workers (optional) - * Number of workers for Glue to use during job execution - * @default 10 - * */ - numberOrWorkers?: int; - - /** - * Worker Type (optional) - * Type of Worker for Glue to use during job execution - * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_2X - * */ - workerType?: glue.WorkerType; - - /** - * Max Concurrent Runs (optional) - * The maximum number of runs this Glue job can concurrently run - * @default 1 - * */ - maxConcurrentRuns?: int; - - /** - * Default Arguments (optional) - * The default arguments for every run of this Glue job, - * specified as name-value pairs. - * */ - defaultArguments?: {[key: string], string }[]; - - /** - * Connections (optional) - * List of connections to use for this Glue job - * */ - connections?: IConnection[]; - - /** - * Max Retries (optional) - * Maximum number of retry attempts Glue performs - * if the job fails - * @default 0 - * */ - maxRetries?: int; - - /** - * Timeout (optional) - * Timeout for the Glue job, specified in minutes - * */ - timeout?: cdk.Duration; - - /** - * Security Configuration (optional) - * Defines the encryption options for the Glue job - * */ - securityConfiguration?: ISecurityConfiguration; - - /** - * Tags (optional) - * A list of key:value pairs of tags to apply to this Glue job resource - * */ - tags?: {[key: string], string }[]; - - /** - * Glue Version - * The version of Glue to use to execute this job - * @default 3.0 - * */ - glueVersion?: glue.GlueVersion; -} -``` - -pySpark Streaming Job Property Interface: - -```ts -pySparkStreamingJobProps{ - /** - * Script Code Location (required) - * Script to run when the Glue job executes. Can be uploaded - * from the local directory structure using fromAsset - * or referenced via S3 location using fromBucket - * */ - script: glue.Code; - - /** - * IAM Role (required) - * IAM Role to use for Glue job execution - * Must be specified by the developer because the L2 doesn't have visibility - * into the actions the script(s) take during the job execution - * */ - role: iam.IRole; - - /** - * Name of the Glue job (optional) - * Developer-specified name of the Glue job - * */ - name?: string; - - /** - * Description (optional) - * Developer-specified description of the Glue job - * */ - description?: string; - - /** - * Number of Workers (optional) - * Number of workers for Glue to use during job execution - * @default 10 - * */ - numberOrWorkers?: int; - - /** - * Worker Type (optional) - * Type of Worker for Glue to use during job execution - * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_2X - * */ - workerType?: glue.WorkerType; - - /** - * Max Concurrent Runs (optional) - * The maximum number of runs this Glue job can concurrently run - * @default 1 - * */ - maxConcurrentRuns?: int; - - /** - * Default Arguments (optional) - * The default arguments for every run of this Glue job, - * specified as name-value pairs. - * */ - defaultArguments?: {[key: string], string }[]; - - /** - * Connections (optional) - * List of connections to use for this Glue job - * */ - connections?: IConnection[]; - - /** - * Max Retries (optional) - * Maximum number of retry attempts Glue perform - * if the job fails - * @default 0 - * */ - maxRetries?: int; - - /** - * Timeout (optional) - * Timeout for the Glue job, specified in minutes - * */ - timeout?: cdk.Duration; - - /** - * Security Configuration (optional) - * Defines the encryption options for the Glue job - * */ - securityConfiguration?: ISecurityConfiguration; - - /** - * Tags (optional) - * A list of key:value pairs of tags to apply to this Glue job resource - * */ - tags?: {[key: string], string }[]; - - /** - * Glue Version - * The version of Glue to use to execute this job - * @default 3.0 - * */ - glueVersion?: glue.GlueVersion; -} -``` - **Flex Jobs** The flexible execution class is appropriate for non-urgent jobs such as @@ -638,14 +223,19 @@ features are enabled by default: `—enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log` ```ts -glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-jar'), +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + +new glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { + script: glue.Code.fromBucket(codeBucket, 'script'); className: 'com.example.HelloWorld', role: iam.IRole, }); -glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), +new glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { + script: glue.Code.fromBucket(codeBucket, 'script'); role: iam.IRole, }); ``` @@ -653,11 +243,20 @@ glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { Optional override examples: ```ts -glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + +new glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { glueVersion: glue.GlueVersion.V3_0, - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-scala-script'), + script: glue.Code.fromBucket(codeBucket, 'script'); className: 'com.example.HelloWorld', - extraJars: [glue.Code.fromBucket('bucket-name', 'path-to-extra-jars')], + extraJars: [ + glue.Code.fromBucket( + s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), + 'prefix/file.jar'), + ] description: 'an example pySpark ETL job', numberOfWorkers: 20, workerType: glue.WorkerType.G8X, @@ -667,8 +266,8 @@ glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { new glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.3_9, - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + pythonVersion: glue.PythonVersion.THREE_NINE, + script: glue.Code.fromBucket(codeBucket, 'script'); description: 'an example Flex job', numberOfWorkers: 20, workerType: glue.WorkerType.G8X, @@ -677,225 +276,6 @@ new glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { }); ``` -Scala Spark Flex Job Property Interface: - -```ts -ScalaSparkFlexJobProps{ - /** - * Script Code Location (required) - * Script to run when the Glue job executes. Can be uploaded - * from the local directory structure using fromAsset - * or referenced via S3 location using fromBucket - * */ - script: glue.Code; - - /** - * Class name (required for Scala scripts) - * Package and class name for the entry point of Glue job execution for - * Java scripts - * */ - className: string; - - /** - * Extra Jars S3 URL (optional) - * S3 URL where additional jar dependencies are located - */ - extraJars?: string[]; - - /** - * IAM Role (required) - * IAM Role to use for Glue job execution - * Must be specified by the developer because the L2 doesn't have visibility - * into the actions the script(s) take during the job execution - * */ - role: iam.IRole; - - /** - * Name of the Glue job (optional) - * Developer-specified name of the Glue job - * */ - name?: string; - - /** - * Description (optional) - * Developer-specified description of the Glue job - * */ - description?: string; - - /** - * Number of Workers (optional) - * Number of workers for Glue to use during job execution - * @default 10 - * */ - numberOrWorkers?: int; - - /** - * Worker Type (optional) - * Type of Worker for Glue to use during job execution - * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_2X - * */ - workerType?: glue.WorkerType; - - /** - * Max Concurrent Runs (optional) - * The maximum number of runs this Glue job can concurrently run - * @default 1 - * */ - maxConcurrentRuns?: int; - - /** - * Default Arguments (optional) - * The default arguments for every run of this Glue job, - * specified as name-value pairs. - * */ - defaultArguments?: {[key: string], string }[]; - - /** - * Connections (optional) - * List of connections to use for this Glue job - * */ - connections?: IConnection[]; - - /** - * Max Retries (optional) - * Maximum number of retry attempts Glue perform - * if the job fails - * @default 0 - * */ - maxRetries?: int; - - /** - * Timeout (optional) - * Timeout for the Glue job, specified in minutes - * @default 2880 (2 days for non-streaming) - * */ - timeout?: cdk.Duration; - - /** - * Security Configuration (optional) - * Defines the encryption options for the Glue job - * */ - securityConfiguration?: ISecurityConfiguration; - - /** - * Tags (optional) - * A list of key:value pairs of tags to apply to this Glue job resource - * */ - tags?: {[key: string], string }[]; - - /** - * Glue Version - * The version of Glue to use to execute this job - * @default 3.0 - * */ - glueVersion?: glue.GlueVersion; -} -``` - -pySpark Flex Job Property Interface: - -```ts -PySparkFlexJobProps{ - /** - * Script Code Location (required) - * Script to run when the Glue job executes. Can be uploaded - * from the local directory structure using fromAsset - * or referenced via S3 location using fromBucket - * */ - script: glue.Code; - - /** - * IAM Role (required) - * IAM Role to use for Glue job execution - * Must be specified by the developer because the L2 doesn't have visibility - * into the actions the script(s) take during the job execution - * */ - role: iam.IRole; - - /** - * Name of the Glue job (optional) - * Developer-specified name of the Glue job - * */ - name?: string; - - /** - * Description (optional) - * Developer-specified description of the Glue job - * */ - description?: string; - - /** - * Number of Workers (optional) - * Number of workers for Glue to use during job execution - * @default 10 - * */ - numberOrWorkers?: int; - - /** - * Worker Type (optional) - * Type of Worker for Glue to use during job execution - * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_2X - * */ - workerType?: glue.WorkerType; - - /** - * Max Concurrent Runs (optional) - * The maximum number of runs this Glue job can concurrently run - * @default 1 - * */ - maxConcurrentRuns?: int; - - /** - * Default Arguments (optional) - * The default arguments for every run of this Glue job, - * specified as name-value pairs. - * */ - defaultArguments?: {[key: string], string }[]; - - /** - * Connections (optional) - * List of connections to use for this Glue job - * */ - connections?: IConnection[]; - - /** - * Max Retries (optional) - * Maximum number of retry attempts Glue perform - * if the job fails - * @default 0 - * */ - maxRetries?: int; - - /** - * Timeout (optional) - * Timeout for the Glue job, specified in minutes - * @default 2880 (2 days for non-streaming) - * */ - timeout?: cdk.Duration; - - /** - * Security Configuration (optional) - * Defines the encryption options for the Glue job - * */ - securityConfiguration?: ISecurityConfiguration; - - /** - * Tags (optional) - * A list of key:value pairs of tags to apply to this Glue job resource - * */ - tags?: {[key: string], string }[]; - - /** - * Glue Version - * The version of Glue to use to execute this job - * @default 3.0 - * */ - glueVersion?: glue.GlueVersion; -} -``` - ### Python Shell Jobs Python shell jobs support a Python version that depends on the AWS Glue @@ -906,8 +286,13 @@ analytics libraries using the `library-set=analytics` flag, which is enabled by default. ```ts +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + new glue.PythonShellJob(this, 'PythonShellJob', { - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + script: glue.Code.fromBucket(codeBucket, 'script'); role: iam.IRole, }); ``` @@ -915,121 +300,24 @@ new glue.PythonShellJob(this, 'PythonShellJob', { Optional override examples: ```ts +import * as cdk from 'aws-cdk-lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + new glue.PythonShellJob(this, 'PythonShellJob', { glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.3_9, - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + pythonVersion: glue.PythonVersion.THREE_NINE, + script: glue.Code.fromBucket(codeBucket, 'script'); description: 'an example Python Shell job', numberOfWorkers: 20, - workerType: glue.WorkerType.G8X, + workerType: glue.WorkerType.G_8X, timeout: cdk.Duration.minutes(15), role: iam.IRole, }); ``` -Python Shell Job Property Interface: - -```ts -PythonShellJobProps{ - /** - * Script Code Location (required) - * Script to run when the Glue job executes. Can be uploaded - * from the local directory structure using fromAsset - * or referenced via S3 location using fromBucket - * */ - script: glue.Code; - - /** - * IAM Role (required) - * IAM Role to use for Glue job execution - * Must be specified by the developer because the L2 doesn't have visibility - * into the actions the script(s) take during the job execution - * */ - role: iam.IRole; - - /** - * Name of the Glue job (optional) - * Developer-specified name of the Glue job - * */ - name?: string; - - /** - * Description (optional) - * Developer-specified description of the Glue job - * */ - description?: string; - - /** - * Number of Workers (optional) - * Number of workers for Glue to use during job execution - * @default 10 - * */ - numberOrWorkers?: int; - - /** - * Worker Type (optional) - * Type of Worker for Glue to use during job execution - * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default G_2X - * */ - workerType?: glue.WorkerType; - - /** - * Max Concurrent Runs (optional) - * The maximum number of runs this Glue job can concurrently run - * @default 1 - * */ - maxConcurrentRuns?: int; - - /** - * Default Arguments (optional) - * The default arguments for every run of this Glue job, - * specified as name-value pairs. - * */ - defaultArguments?: {[key: string], string }[]; - - /** - * Connections (optional) - * List of connections to use for this Glue job - * */ - connections?: IConnection[]; - - /** - * Max Retries (optional) - * Maximum number of retry attempts Glue perform - * if the job fails - * @default 0 - * */ - maxRetries?: int; - - /** - * Timeout (optional) - * Timeout for the Glue job, specified in minutes - * @default 2880 (2 days for non-streaming) - * */ - timeout?: cdk.Duration; - - /** - * Security Configuration (optional) - * Defines the encryption options for the Glue job - * */ - securityConfiguration?: ISecurityConfiguration; - - /** - * Tags (optional) - * A list of key:value pairs of tags to apply to this Glue job resource - * */ - tags?: {[key: string], string }[]; - - /** - * Glue Version - * The version of Glue to use to execute this job - * @default 3.0 for ETL - * */ - glueVersion?: glue.GlueVersion; -} -``` - ### Ray Jobs Glue Ray jobs use worker type Z.2X and Glue version 4.0. These are not @@ -1037,8 +325,13 @@ overrideable since these are the only configuration that Glue Ray jobs currently support. The runtime defaults to Ray2.4 and min workers defaults to 3. ```ts +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + new glue.GlueRayJob(this, 'GlueRayJob', { - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), + script: glue.Code.fromBucket(codeBucket, 'script'); role: iam.IRole, }); ``` @@ -1046,6 +339,11 @@ new glue.GlueRayJob(this, 'GlueRayJob', { Optional override example: ```ts +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + new glue.GlueRayJob(this, 'GlueRayJob', { script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), numberOfWorkers: 50, @@ -1054,117 +352,6 @@ new glue.GlueRayJob(this, 'GlueRayJob', { }); ``` -Ray Job Property Interface: - -```ts -RayJobProps{ - /** - * Script Code Location (required) - * Script to run when the Glue job executes. Can be uploaded - * from the local directory structure using fromAsset - * or referenced via S3 location using fromBucket - * */ - script: glue.Code; - - /** - * IAM Role (required) - * IAM Role to use for Glue job execution - * Must be specified by the developer because the L2 doesn't have visibility - * into the actions the script(s) take during the job execution - * */ - role: iam.IRole; - - /** - * Name of the Glue job (optional) - * Developer-specified name of the Glue job - * */ - name?: string; - - /** - * Description (optional) - * Developer-specified description of the Glue job - * */ - description?: string; - - /** - * Number of Workers (optional) - * Number of workers for Glue to use during job execution - * @default 10 - * */ - numberOrWorkers?: int; - - /** - * Worker Type (optional) - * Type of Worker for Glue to use during job execution - * Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X - * @default Z_2X - * */ - workerType?: glue.WorkerType; - - /** - * Runtime (optional) - * Type of Worker for Glue to use during job execution - * Enum options: Ray2_2, Ray 2_3, Ray2_4 - * @default Ray2_4 - * */ - runtime?: glue.RayRuntime; - - /** - * Max Concurrent Runs (optional) - * The maximum number of runs this Glue job can concurrently run - * @default 1 - * */ - maxConcurrentRuns?: int; - - /** - * Default Arguments (optional) - * The default arguments for every run of this Glue job, - * specified as name-value pairs. - * */ - defaultArguments?: {[key: string], string }[]; - - /** - * Connections (optional) - * List of connections to use for this Glue job - * */ - connections?: IConnection[]; - - /** - * Max Retries (optional) - * Maximum number of retry attempts Glue perform - * if the job fails - * @default 0 - * */ - maxRetries?: int; - - /** - * Timeout (optional) - * Timeout for the Glue job, specified in minutes - * @default 2880 (2 days for non-streaming) - * */ - timeout?: cdk.Duration; - - /** - * Security Configuration (optional) - * Defines the encryption options for the Glue job - * */ - securityConfiguration?: ISecurityConfiguration; - - /** - * Tags (optional) - * A list of key:value pairs of tags to apply to this Glue job resource - * */ - tags?: {[key: string], string }[]; - - /** - * Glue Version - * The version of Glue to use to execute this job - * @default 4.0 - * */ - glueVersion?: glue.GlueVersion; -} -``` - ### Uploading scripts from the CDK app repository to S3 Similar to other L2 constructs, the Glue L2 automates uploading / updating @@ -1173,8 +360,12 @@ in the local file structure. You provide the existing S3 bucket and path to which you'd like the script to be uploaded. ```ts -glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromAsset('bucket-name', 'local/path/to/scala-jar'), +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + +new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { + script: glue.Code.fromBucket(codeBucket, 'script'); className: 'com.example.HelloWorld', }); ``` @@ -1201,15 +392,23 @@ takes an optional description parameter, but abstracts the requirement of an actions list using the job or crawler objects using conditional types. ```ts -myWorkflow = new glue.Workflow(this, "GlueWorkflow", { +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + +const myWorkflow = new glue.Workflow(this, "GlueWorkflow", { name: "MyWorkflow"; description: "New Workflow"; properties: {'key', 'value'}; }); -myWorkflow.onDemandTrigger(this, 'TriggerJobOnDemand', { - description: 'On demand run for ' + glue.JobExecutable.name, - actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] +const job = new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { + script: glue.Code.fromBucket(codeBucket, 'script'); + className: 'com.example.HelloWorld', +}); + +workflow.addOnDemandTrigger('OnDemandTrigger', { + actions: [{ job }], }); ``` @@ -1224,23 +423,44 @@ the expression that Glue requires from the Schedule object. The constructor takes an optional description and a list of jobs or crawlers as actions. ```ts +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + // Create Daily Schedule at 00 UTC -myWorkflow.dailyScheduleTrigger(this, 'TriggerCrawlerOnDailySchedule', { - description: 'Scheduled run for ' + glue.JobExecutable.name, - actions: [ jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] +const myWorkflow = new glue.Workflow(this, "GlueWorkflow", { + name: "MyWorkflow"; + description: "New Workflow"; + properties: {'key', 'value'}; +}); + +const job = new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { + script: glue.Code.fromBucket(codeBucket, 'script'); + className: 'com.example.HelloWorld', +}); + +workflow.addDailyScheduledTrigger('DailyScheduledTrigger', { + actions: [{ job }], + startOnCreation: true, }); // Create Weekly schedule at 00 UTC on Sunday -myWorkflow.weeklyScheduleTrigger(this, 'TriggerJobOnWeeklySchedule', { - description: 'Scheduled run for ' + glue.JobExecutable.name, - actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] +workflow.addWeeklyScheduledTrigger('WeeklyScheduledTrigger', { + actions: [{ job }], + startOnCreation: false, }); -// Create Custom schedule, e.g. Monthly on the 7th day at 15:30 UTC -myWorkflow.customScheduleJobTrigger(this, 'TriggerCrawlerOnCustomSchedule', { - description: 'Scheduled run for ' + glue.JobExecutable.name, - actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] - schedule: events.Schedule.cron(day: '7', hour: '15', minute: '30') +// Create Custom schedule +const customSchedule = TriggerSchedule.cron({ + minute: '0', + hour: '20', + weekDay: 'THU', +}); + +workflow.addCustomScheduledTrigger('CustomScheduledTrigger', { + actions: [{ job }], + schedule: customSchedule, + startOnCreation: true, }); ``` @@ -1253,14 +473,29 @@ defaults to 900 seconds, but you can override the window to align with your workload's requirements. ```ts -myWorkflow.notifyEventTrigger(this, 'MyNotifyTriggerBatching', { - batchSize: int, - jobActions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...], - actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ... ] +import * as cdk from 'aws-cdk-lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + +const myWorkflow = new glue.Workflow(this, "GlueWorkflow", { + name: "MyWorkflow"; + description: "New Workflow"; + properties: {'key', 'value'}; }); -myWorkflow.notifyEventTrigger(this, 'MyNotifyTriggerNonBatching', { - actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] +const job = new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { + script: glue.Code.fromBucket(codeBucket, 'script'); + className: 'com.example.HelloWorld', +}); + +workflow.addNotifyEventTrigger('NotifyEventTrigger', { + actions: [{ job }], + eventBatchingCondition: { + batchSize: 10, + batchWindow: cdk.Duration.minutes(5), + }, }); ``` @@ -1270,13 +505,30 @@ Conditional triggers have a predicate and actions associated with them. The trigger actions are executed when the predicateCondition is true. ```ts +import * as s3 from 'aws-cdk-lib/aws-s3'; + +const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); + // Triggers on Job and Crawler status -myWorkflow.conditionalTrigger(this, 'conditionalTrigger', { - description: 'Conditional trigger for ' + myGlueJob.name, - actions: [jobOrCrawler: glue.JobExecutable | cdk.CfnCrawler?, ...] - predicateCondition: glue.TriggerPredicateCondition.AND, - jobPredicates: [{'job': JobExecutable, 'state': glue.JobState.FAILED}, - {'job': JobExecutable, 'state' : glue.JobState.SUCCEEDED}] +const myWorkflow = new glue.Workflow(this, "GlueWorkflow", { + name: "MyWorkflow"; + description: "New Workflow"; + properties: {'key', 'value'}; +}); + +const job = new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { + script: glue.Code.fromBucket(codeBucket, 'script'); + className: 'com.example.HelloWorld', +}); + +workflow.addconditionalTrigger('ConditionalTrigger', { + actions: [{ job }], + predicate: { + conditions: [{ + job, + state: glue.JobState.SUCCEEDED, + }], + }, }); ``` From 9f1edd67febf23362191c858420f2d21b45b5a3f Mon Sep 17 00:00:00 2001 From: Natalie White Date: Tue, 30 Jul 2024 18:20:44 +0000 Subject: [PATCH 49/51] Resolve additional README compilation issues --- packages/@aws-cdk/aws-glue-alpha/README.md | 167 ++++++--------------- 1 file changed, 44 insertions(+), 123 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index ed9229ce373c9..2859c3808200a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -94,17 +94,22 @@ You can find more details about version, worker type and other features in ```ts import * as s3 from 'aws-cdk-lib/aws-s3'; +import * as cdk from 'aws-cdk-lib'; +import * as iam from 'aws-cdk-lib/aws-iam'; +const stack = new cdk.Stack(); +const role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); +const script = glue.Code.fromBucket(codeBucket, 'script'); new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), className: 'com.example.HelloWorld', role: iam.IRole, }); new glue.PySparkEtlJob(this, 'PySparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), role: iam.IRole, }); ``` @@ -112,21 +117,15 @@ new glue.PySparkEtlJob(this, 'PySparkEtlJob', { Optional override examples: ```ts -import * as cdk from 'aws-cdk-lib'; -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { glueVersion: glue.GlueVersion.V3_0, - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), className: 'com.example.HelloWorld', extraJars: [ glue.Code.fromBucket( s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), 'prefix/file.jar'), - ] + ], numberOfWorkers: 20, workerType: glue.WorkerType.G_8X, timeout: cdk.Duration.minutes(15), @@ -136,11 +135,11 @@ new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { new glue.PySparkEtlJob(this, 'PySparkEtlJob', { jobType: glue.JobType.ETL, glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.3_9, - script: glue.Code.fromBucket(codeBucket, 'script'); + pythonVersion: glue.PythonVersion.THREE_NINE, + script: glue.Code.fromBucket(codeBucket, 'script'), description: 'an example pySpark ETL job', numberOfWorkers: 20, - workerType: glue.WorkerType.G8X, + workerType: glue.WorkerType.G_8X, timeout: cdk.Duration.minutes(15), role: iam.IRole, }); @@ -158,18 +157,14 @@ The following best practice features are enabled by default: `—enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log`. ```ts -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - new glue.pySparkStreamingJob(this, 'pySparkStreamingJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), role: iam.IRole, }); new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), className: 'com.example.HelloWorld', role: iam.IRole, }); @@ -179,19 +174,13 @@ new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { Optional override examples: ```ts -import * as cdk from 'aws-cdk-lib'; -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - new glue.pySparkStreamingJob(this, 'pySparkStreamingJob', { glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE_NINE, - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), description: 'an example Python Streaming job', numberOfWorkers: 20, - workerType: glue.WorkerType.G8X, + workerType: glue.WorkerType.G_8X, timeout: cdk.Duration.minutes(15), role: iam.IRole, }); @@ -199,16 +188,16 @@ new glue.pySparkStreamingJob(this, 'pySparkStreamingJob', { new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE_NINE, - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), extraJars: [ glue.Code.fromBucket( s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), 'prefix/file.jar'), - ] + ], className: 'com.example.HelloWorld', description: 'an example Python Streaming job', numberOfWorkers: 20, - workerType: glue.WorkerType.G8X, + workerType: glue.WorkerType.G_8X, timeout: cdk.Duration.minutes(15), role: iam.IRole, }); @@ -223,19 +212,14 @@ features are enabled by default: `—enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log` ```ts -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - new glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), className: 'com.example.HelloWorld', role: iam.IRole, }); new glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), role: iam.IRole, }); ``` @@ -243,23 +227,18 @@ new glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { Optional override examples: ```ts -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - new glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { glueVersion: glue.GlueVersion.V3_0, - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), className: 'com.example.HelloWorld', extraJars: [ glue.Code.fromBucket( s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), 'prefix/file.jar'), - ] + ], description: 'an example pySpark ETL job', numberOfWorkers: 20, - workerType: glue.WorkerType.G8X, + workerType: glue.WorkerType.G_8X, timeout: cdk.Duration.minutes(15), role: iam.IRole, }); @@ -267,10 +246,10 @@ new glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { new glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { glueVersion: glue.GlueVersion.V3_0, pythonVersion: glue.PythonVersion.THREE_NINE, - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), description: 'an example Flex job', numberOfWorkers: 20, - workerType: glue.WorkerType.G8X, + workerType: glue.WorkerType.G_8X, timeout: cdk.Duration.minutes(15), role: iam.IRole, }); @@ -286,13 +265,8 @@ analytics libraries using the `library-set=analytics` flag, which is enabled by default. ```ts -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - new glue.PythonShellJob(this, 'PythonShellJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), role: iam.IRole, }); ``` @@ -300,16 +274,10 @@ new glue.PythonShellJob(this, 'PythonShellJob', { Optional override examples: ```ts -import * as cdk from 'aws-cdk-lib'; -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - new glue.PythonShellJob(this, 'PythonShellJob', { glueVersion: glue.GlueVersion.V1_0, pythonVersion: glue.PythonVersion.THREE_NINE, - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), description: 'an example Python Shell job', numberOfWorkers: 20, workerType: glue.WorkerType.G_8X, @@ -325,13 +293,8 @@ overrideable since these are the only configuration that Glue Ray jobs currently support. The runtime defaults to Ray2.4 and min workers defaults to 3. ```ts -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - new glue.GlueRayJob(this, 'GlueRayJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), role: iam.IRole, }); ``` @@ -339,11 +302,6 @@ new glue.GlueRayJob(this, 'GlueRayJob', { Optional override example: ```ts -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - new glue.GlueRayJob(this, 'GlueRayJob', { script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), numberOfWorkers: 50, @@ -360,12 +318,8 @@ in the local file structure. You provide the existing S3 bucket and path to which you'd like the script to be uploaded. ```ts -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), className: 'com.example.HelloWorld', }); ``` @@ -392,18 +346,14 @@ takes an optional description parameter, but abstracts the requirement of an actions list using the job or crawler objects using conditional types. ```ts -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - -const myWorkflow = new glue.Workflow(this, "GlueWorkflow", { +const workflow = new glue.Workflow(this, "GlueWorkflow", { name: "MyWorkflow"; description: "New Workflow"; properties: {'key', 'value'}; }); const job = new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); + script: glue.Code.fromBucket(codeBucket, 'script'), className: 'com.example.HelloWorld', }); @@ -423,20 +373,11 @@ the expression that Glue requires from the Schedule object. The constructor takes an optional description and a list of jobs or crawlers as actions. ```ts -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - // Create Daily Schedule at 00 UTC -const myWorkflow = new glue.Workflow(this, "GlueWorkflow", { - name: "MyWorkflow"; - description: "New Workflow"; - properties: {'key', 'value'}; -}); - -const job = new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); - className: 'com.example.HelloWorld', +workflow = new glue.Workflow(this, "GlueWorkflow", { + name: "MyWorkflow", + description: "New Workflow", + properties: {'key': 'value'}, }); workflow.addDailyScheduledTrigger('DailyScheduledTrigger', { @@ -473,21 +414,10 @@ defaults to 900 seconds, but you can override the window to align with your workload's requirements. ```ts -import * as cdk from 'aws-cdk-lib'; -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - -const myWorkflow = new glue.Workflow(this, "GlueWorkflow", { - name: "MyWorkflow"; - description: "New Workflow"; - properties: {'key', 'value'}; -}); - -const job = new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); - className: 'com.example.HelloWorld', +workflow = new glue.Workflow(this, "GlueWorkflow", { + name: "MyWorkflow", + description: "New Workflow", + properties: {'key': 'value'}, }); workflow.addNotifyEventTrigger('NotifyEventTrigger', { @@ -505,20 +435,11 @@ Conditional triggers have a predicate and actions associated with them. The trigger actions are executed when the predicateCondition is true. ```ts -import * as s3 from 'aws-cdk-lib/aws-s3'; - -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); - // Triggers on Job and Crawler status -const myWorkflow = new glue.Workflow(this, "GlueWorkflow", { - name: "MyWorkflow"; - description: "New Workflow"; - properties: {'key', 'value'}; -}); - -const job = new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'); - className: 'com.example.HelloWorld', +workflow = new glue.Workflow(this, "GlueWorkflow", { + name: "MyWorkflow", + description: "New Workflow", + properties: {'key': 'value'}, }); workflow.addconditionalTrigger('ConditionalTrigger', { From f4886a7e89fdd9057f625ad10c34c12d0b69e2b9 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Tue, 6 Aug 2024 19:49:21 +0000 Subject: [PATCH 50/51] Remove code examples from README and reference unit tests for examples --- packages/@aws-cdk/aws-glue-alpha/README.md | 287 ++------------------- 1 file changed, 17 insertions(+), 270 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index 2859c3808200a..d54fb7563c685 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -92,58 +92,9 @@ The following ETL features are enabled by default: You can find more details about version, worker type and other features in [Glue's public documentation](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-jobs-job.html). -```ts -import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as cdk from 'aws-cdk-lib'; -import * as iam from 'aws-cdk-lib/aws-iam'; - -const stack = new cdk.Stack(); -const role = iam.Role.fromRoleArn(stack, 'Role', 'arn:aws:iam::123456789012:role/TestRole'); -const codeBucket = s3.Bucket.fromBucketName(stack, 'CodeBucket', 'bucketname'); -const script = glue.Code.fromBucket(codeBucket, 'script'); - -new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'), - className: 'com.example.HelloWorld', - role: iam.IRole, -}); - -new glue.PySparkEtlJob(this, 'PySparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'), - role: iam.IRole, -}); -``` - -Optional override examples: - -```ts -new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - glueVersion: glue.GlueVersion.V3_0, - script: glue.Code.fromBucket(codeBucket, 'script'), - className: 'com.example.HelloWorld', - extraJars: [ - glue.Code.fromBucket( - s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), - 'prefix/file.jar'), - ], - numberOfWorkers: 20, - workerType: glue.WorkerType.G_8X, - timeout: cdk.Duration.minutes(15), - role: iam.IRole, -}); - -new glue.PySparkEtlJob(this, 'PySparkEtlJob', { - jobType: glue.JobType.ETL, - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script: glue.Code.fromBucket(codeBucket, 'script'), - description: 'an example pySpark ETL job', - numberOfWorkers: 20, - workerType: glue.WorkerType.G_8X, - timeout: cdk.Duration.minutes(15), - role: iam.IRole, -}); -``` +Reference the pyspark-etl-jobs.test.ts and scalaspark-etl-jobs.test.ts unit tests +for examples of required-only and optional job parameters when creating these +types of jobs. **Streaming Jobs** @@ -156,52 +107,9 @@ defaults to the G2 worker type and Glue 4.0, both of which you can override. The following best practice features are enabled by default: `—enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log`. -```ts -new glue.pySparkStreamingJob(this, 'pySparkStreamingJob', { - script: glue.Code.fromBucket(codeBucket, 'script'), - role: iam.IRole, -}); - - -new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { - script: glue.Code.fromBucket(codeBucket, 'script'), - className: 'com.example.HelloWorld', - role: iam.IRole, -}); - -``` - -Optional override examples: - -```ts -new glue.pySparkStreamingJob(this, 'pySparkStreamingJob', { - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script: glue.Code.fromBucket(codeBucket, 'script'), - description: 'an example Python Streaming job', - numberOfWorkers: 20, - workerType: glue.WorkerType.G_8X, - timeout: cdk.Duration.minutes(15), - role: iam.IRole, -}); - -new glue.ScalaSparkStreamingJob(this, 'ScalaSparkStreamingJob', { - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script: glue.Code.fromBucket(codeBucket, 'script'), - extraJars: [ - glue.Code.fromBucket( - s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), - 'prefix/file.jar'), - ], - className: 'com.example.HelloWorld', - description: 'an example Python Streaming job', - numberOfWorkers: 20, - workerType: glue.WorkerType.G_8X, - timeout: cdk.Duration.minutes(15), - role: iam.IRole, -}); -``` +Reference the pyspark-streaming-jobs.test.ts and scalaspark-streaming-jobs.test.ts +unit tests for examples of required-only and optional job parameters when creating +these types of jobs. **Flex Jobs** @@ -211,49 +119,9 @@ to Glue version 3.0 and worker type `G_2X`. The following best practice features are enabled by default: `—enable-metrics, —enable-spark-ui, —enable-continuous-cloudwatch-log` -```ts -new glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'), - className: 'com.example.HelloWorld', - role: iam.IRole, -}); - -new glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'), - role: iam.IRole, -}); -``` - -Optional override examples: - -```ts -new glue.ScalaSparkFlexEtlJob(this, 'ScalaSparkFlexEtlJob', { - glueVersion: glue.GlueVersion.V3_0, - script: glue.Code.fromBucket(codeBucket, 'script'), - className: 'com.example.HelloWorld', - extraJars: [ - glue.Code.fromBucket( - s3.Bucket.fromBucketName(stack, 'extraJarsBucket', 'extra-jars-bucket'), - 'prefix/file.jar'), - ], - description: 'an example pySpark ETL job', - numberOfWorkers: 20, - workerType: glue.WorkerType.G_8X, - timeout: cdk.Duration.minutes(15), - role: iam.IRole, -}); - -new glue.pySparkFlexEtlJob(this, 'pySparkFlexEtlJob', { - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script: glue.Code.fromBucket(codeBucket, 'script'), - description: 'an example Flex job', - numberOfWorkers: 20, - workerType: glue.WorkerType.G_8X, - timeout: cdk.Duration.minutes(15), - role: iam.IRole, -}); -``` +Reference the pyspark-flex-etl-jobs.test.ts and scalaspark-flex-etl-jobs.test.ts +unit tests for examples of required-only and optional job parameters when creating +these types of jobs. ### Python Shell Jobs @@ -264,27 +132,8 @@ Python 3.9 and a MaxCapacity of `0.0625`. Python 3.9 supports pre-loaded analytics libraries using the `library-set=analytics` flag, which is enabled by default. -```ts -new glue.PythonShellJob(this, 'PythonShellJob', { - script: glue.Code.fromBucket(codeBucket, 'script'), - role: iam.IRole, -}); -``` - -Optional override examples: - -```ts -new glue.PythonShellJob(this, 'PythonShellJob', { - glueVersion: glue.GlueVersion.V1_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script: glue.Code.fromBucket(codeBucket, 'script'), - description: 'an example Python Shell job', - numberOfWorkers: 20, - workerType: glue.WorkerType.G_8X, - timeout: cdk.Duration.minutes(15), - role: iam.IRole, -}); -``` +Reference the pyspark-shell-job.test.ts unit tests for examples of +required-only and optional job parameters when creating these types of jobs. ### Ray Jobs @@ -292,23 +141,8 @@ Glue Ray jobs use worker type Z.2X and Glue version 4.0. These are not overrideable since these are the only configuration that Glue Ray jobs currently support. The runtime defaults to Ray2.4 and min workers defaults to 3. -```ts -new glue.GlueRayJob(this, 'GlueRayJob', { - script: glue.Code.fromBucket(codeBucket, 'script'), - role: iam.IRole, -}); -``` - -Optional override example: - -```ts -new glue.GlueRayJob(this, 'GlueRayJob', { - script: glue.Code.fromBucket('bucket-name', 's3prefix/path-to-python-script'), - numberOfWorkers: 50, - minWorkers: 25, - role: iam.IRole, -}); -``` +Reference the ray-job.test.ts unit tests for examples of required-only and +optional job parameters when creating these types of jobs. ### Uploading scripts from the CDK app repository to S3 @@ -317,12 +151,7 @@ scripts to S3 via an optional fromAsset parameter pointing to a script in the local file structure. You provide the existing S3 bucket and path to which you'd like the script to be uploaded. -```ts -new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'), - className: 'com.example.HelloWorld', -}); -``` +Reference the unit tests for examples of repo and S3 code target examples. ### Workflow Triggers @@ -338,6 +167,9 @@ to jobs. StartOnCreation defaults to true for all trigger types, but you can override it if you prefer for your trigger not to start on creation. +Reference the workflow-triggers.test.ts unit tests for examples of creating +workflows and triggers. + 1. **On-Demand Triggers** On-demand triggers can start glue jobs or crawlers. This construct provides @@ -345,23 +177,6 @@ convenience functions to create on-demand crawler or job triggers. The construct takes an optional description parameter, but abstracts the requirement of an actions list using the job or crawler objects using conditional types. -```ts -const workflow = new glue.Workflow(this, "GlueWorkflow", { - name: "MyWorkflow"; - description: "New Workflow"; - properties: {'key', 'value'}; -}); - -const job = new glue.ScalaSparkEtlJob(this, 'ScalaSparkEtlJob', { - script: glue.Code.fromBucket(codeBucket, 'script'), - className: 'com.example.HelloWorld', -}); - -workflow.addOnDemandTrigger('OnDemandTrigger', { - actions: [{ job }], -}); -``` - 1. **Scheduled Triggers** You can create scheduled triggers using cron expressions. This construct @@ -372,39 +187,6 @@ without having to build your own cron expressions. The L2 extracts the expression that Glue requires from the Schedule object. The constructor takes an optional description and a list of jobs or crawlers as actions. -```ts -// Create Daily Schedule at 00 UTC -workflow = new glue.Workflow(this, "GlueWorkflow", { - name: "MyWorkflow", - description: "New Workflow", - properties: {'key': 'value'}, -}); - -workflow.addDailyScheduledTrigger('DailyScheduledTrigger', { - actions: [{ job }], - startOnCreation: true, -}); - -// Create Weekly schedule at 00 UTC on Sunday -workflow.addWeeklyScheduledTrigger('WeeklyScheduledTrigger', { - actions: [{ job }], - startOnCreation: false, -}); - -// Create Custom schedule -const customSchedule = TriggerSchedule.cron({ - minute: '0', - hour: '20', - weekDay: 'THU', -}); - -workflow.addCustomScheduledTrigger('CustomScheduledTrigger', { - actions: [{ job }], - schedule: customSchedule, - startOnCreation: true, -}); -``` - #### **3. Notify Event Triggers** There are two types of notify event triggers: batching and non-batching. @@ -413,46 +195,11 @@ triggers, `BatchSize` defaults to 1. For both triggers, `BatchWindow` defaults to 900 seconds, but you can override the window to align with your workload's requirements. -```ts -workflow = new glue.Workflow(this, "GlueWorkflow", { - name: "MyWorkflow", - description: "New Workflow", - properties: {'key': 'value'}, -}); - -workflow.addNotifyEventTrigger('NotifyEventTrigger', { - actions: [{ job }], - eventBatchingCondition: { - batchSize: 10, - batchWindow: cdk.Duration.minutes(5), - }, -}); -``` - #### **4. Conditional Triggers** Conditional triggers have a predicate and actions associated with them. The trigger actions are executed when the predicateCondition is true. -```ts -// Triggers on Job and Crawler status -workflow = new glue.Workflow(this, "GlueWorkflow", { - name: "MyWorkflow", - description: "New Workflow", - properties: {'key': 'value'}, -}); - -workflow.addconditionalTrigger('ConditionalTrigger', { - actions: [{ job }], - predicate: { - conditions: [{ - job, - state: glue.JobState.SUCCEEDED, - }], - }, -}); -``` - ### Connection Properties A `Connection` allows Glue jobs, crawlers and development endpoints to access From 0d8c5eb6ace70e6572c8160f0c2df5e09a95ef68 Mon Sep 17 00:00:00 2001 From: Natalie White Date: Fri, 4 Oct 2024 00:02:28 +0000 Subject: [PATCH 51/51] Updates based on feedback from @GavinZZ and Paul Sun --- packages/@aws-cdk/aws-glue-alpha/README.md | 402 +++++++++++++++++- .../@aws-cdk/aws-glue-alpha/lib/jobs/job.ts | 2 +- .../lib/jobs/pyspark-etl-job.ts | 11 +- 3 files changed, 406 insertions(+), 9 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index d54fb7563c685..fe8644a86dd3e 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -24,7 +24,7 @@ service that makes it easier to discover, prepare, move, and integrate data from multiple sources for analytics, machine learning (ML), and application development. -Wihout an L2 construct, developers define Glue data sources, connections, +Without an L2 construct, developers define Glue data sources, connections, jobs, and workflows for their data and ETL solutions via the AWS console, the AWS CLI, and Infrastructure as Code tools like CloudFormation and the CDK. However, there are several challenges to defining Glue resources at @@ -219,6 +219,406 @@ configuration existing [EC2 Subnet Selection](https://docs.aws.amazon.com/cdk/api/v2/python/aws_cdk.aws_ec2/SubnetSelection.html) library to make the best choice selection for the subnet. +```ts +declare const securityGroup: ec2.SecurityGroup; +declare const subnet: ec2.Subnet; +new glue.Connection(this, 'MyConnection', { + type: glue.ConnectionType.NETWORK, + // The security groups granting AWS Glue inbound access to the data source within the VPC + securityGroups: [securityGroup], + // The VPC subnet which contains the data source + subnet, +}); +``` + +For RDS `Connection` by JDBC, it is recommended to manage credentials using AWS Secrets Manager. To use Secret, specify `SECRET_ID` in `properties` like the following code. Note that in this case, the subnet must have a route to the AWS Secrets Manager VPC endpoint or to the AWS Secrets Manager endpoint through a NAT gateway. + +```ts +declare const securityGroup: ec2.SecurityGroup; +declare const subnet: ec2.Subnet; +declare const db: rds.DatabaseCluster; +new glue.Connection(this, "RdsConnection", { + type: glue.ConnectionType.JDBC, + securityGroups: [securityGroup], + subnet, + properties: { + JDBC_CONNECTION_URL: `jdbc:mysql://${db.clusterEndpoint.socketAddress}/databasename`, + JDBC_ENFORCE_SSL: "false", + SECRET_ID: db.secret!.secretName, + }, +}); +``` + +If you need to use a connection type that doesn't exist as a static member on `ConnectionType`, you can instantiate a `ConnectionType` object, e.g: `new glue.ConnectionType('NEW_TYPE')`. + +See [Adding a Connection to Your Data Store](https://docs.aws.amazon.com/glue/latest/dg/populate-add-connection.html) and [Connection Structure](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-connections.html#aws-glue-api-catalog-connections-Connection) documentation for more information on the supported data stores and their configurations. + +## SecurityConfiguration + +A `SecurityConfiguration` is a set of security properties that can be used by AWS Glue to encrypt data at rest. + +```ts +new glue.SecurityConfiguration(this, 'MySecurityConfiguration', { + cloudWatchEncryption: { + mode: glue.CloudWatchEncryptionMode.KMS, + }, + jobBookmarksEncryption: { + mode: glue.JobBookmarksEncryptionMode.CLIENT_SIDE_KMS, + }, + s3Encryption: { + mode: glue.S3EncryptionMode.KMS, + }, +}); +``` + +By default, a shared KMS key is created for use with the encryption configurations that require one. You can also supply your own key for each encryption config, for example, for CloudWatch encryption: + +```ts +declare const key: kms.Key; +new glue.SecurityConfiguration(this, 'MySecurityConfiguration', { + cloudWatchEncryption: { + mode: glue.CloudWatchEncryptionMode.KMS, + kmsKey: key, + }, +}); +``` + +See [documentation](https://docs.aws.amazon.com/glue/latest/dg/encryption-security-configuration.html) for more info for Glue encrypting data written by Crawlers, Jobs, and Development Endpoints. + +## Database + +A `Database` is a logical grouping of `Tables` in the Glue Catalog. + +```ts +new glue.Database(this, 'MyDatabase', { + databaseName: 'my_database', + description: 'my_database_description', +}); +``` + +## Table + +A Glue table describes a table of data in S3: its structure (column names and types), location of data (S3 objects with a common prefix in a S3 bucket), and format for the files (Json, Avro, Parquet, etc.): + +```ts +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }, { + name: 'col2', + type: glue.Schema.array(glue.Schema.STRING), + comment: 'col2 is an array of strings' // comment is optional + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + +By default, a S3 bucket will be created to store the table's data but you can manually pass the `bucket` and `s3Prefix`: + +```ts +declare const myBucket: s3.Bucket; +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + bucket: myBucket, + s3Prefix: 'my-table/', + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + +Glue tables can be configured to contain user-defined properties, to describe the physical storage of table data, through the `storageParameters` property: + +```ts +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + storageParameters: [ + glue.StorageParameter.skipHeaderLineCount(1), + glue.StorageParameter.compressionType(glue.CompressionType.GZIP), + glue.StorageParameter.custom('separatorChar', ',') + ], + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + +Glue tables can also be configured to contain user-defined table properties through the [`parameters`](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-glue-table-tableinput.html#cfn-glue-table-tableinput-parameters) property: + +```ts +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + parameters: { + key1: 'val1', + key2: 'val2', + }, + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + +### Partition Keys + +To improve query performance, a table can specify `partitionKeys` on which data is stored and queried separately. For example, you might partition a table by `year` and `month` to optimize queries based on a time window: + +```ts +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }, { + name: 'month', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + +### Partition Indexes + +Another way to improve query performance is to specify partition indexes. If no partition indexes are +present on the table, AWS Glue loads all partitions of the table and filters the loaded partitions using +the query expression. The query takes more time to run as the number of partitions increase. With an +index, the query will try to fetch a subset of the partitions instead of loading all partitions of the +table. + +The keys of a partition index must be a subset of the partition keys of the table. You can have a +maximum of 3 partition indexes per table. To specify a partition index, you can use the `partitionIndexes` +property: + +```ts +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }, { + name: 'month', + type: glue.Schema.SMALL_INT, + }], + partitionIndexes: [{ + indexName: 'my-index', // optional + keyNames: ['year'], + }], // supply up to 3 indexes + dataFormat: glue.DataFormat.JSON, +}); +``` + +Alternatively, you can call the `addPartitionIndex()` function on a table: + +```ts +declare const myTable: glue.Table; +myTable.addPartitionIndex({ + indexName: 'my-index', + keyNames: ['year'], +}); +``` + +### Partition Filtering + +If you have a table with a large number of partitions that grows over time, consider using AWS Glue partition indexing and filtering. + +```ts +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }, { + name: 'month', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + enablePartitionFiltering: true, +}); +``` + +### Glue Connections + +Glue connections allow external data connections to third party databases and data warehouses. However, these connections can also be assigned to Glue Tables, allowing you to query external data sources using the Glue Data Catalog. + +Whereas `S3Table` will point to (and if needed, create) a bucket to store the tables' data, `ExternalTable` will point to an existing table in a data source. For example, to create a table in Glue that points to a table in Redshift: + +```ts +declare const myConnection: glue.Connection; +declare const myDatabase: glue.Database; +new glue.ExternalTable(this, 'MyTable', { + connection: myConnection, + externalDataLocation: 'default_db_public_example', // A table in Redshift + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + +## [Encryption](https://docs.aws.amazon.com/athena/latest/ug/encryption.html) + +You can enable encryption on a Table's data: + +* [S3Managed](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html) - (default) Server side encryption (`SSE-S3`) with an Amazon S3-managed key. + +```ts +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + encryption: glue.TableEncryption.S3_MANAGED, + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + +* [Kms](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html) - Server-side encryption (`SSE-KMS`) with an AWS KMS Key managed by the account owner. + +```ts +declare const myDatabase: glue.Database; +// KMS key is created automatically +new glue.S3Table(this, 'MyTable', { + encryption: glue.TableEncryption.KMS, + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); + +// with an explicit KMS key +new glue.S3Table(this, 'MyTable', { + encryption: glue.TableEncryption.KMS, + encryptionKey: new kms.Key(this, 'MyKey'), + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + +* [KmsManaged](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html) - Server-side encryption (`SSE-KMS`), like `Kms`, except with an AWS KMS Key managed by the AWS Key Management Service. + +```ts +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + encryption: glue.TableEncryption.KMS_MANAGED, + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + +* [ClientSideKms](https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingClientSideEncryption.html#client-side-encryption-kms-managed-master-key-intro) - Client-side encryption (`CSE-KMS`) with an AWS KMS Key managed by the account owner. + +```ts +declare const myDatabase: glue.Database; +// KMS key is created automatically +new glue.S3Table(this, 'MyTable', { + encryption: glue.TableEncryption.CLIENT_SIDE_KMS, + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); + +// with an explicit KMS key +new glue.S3Table(this, 'MyTable', { + encryption: glue.TableEncryption.CLIENT_SIDE_KMS, + encryptionKey: new kms.Key(this, 'MyKey'), + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + +*Note: you cannot provide a `Bucket` when creating the `S3Table` if you wish to use server-side encryption (`KMS`, `KMS_MANAGED` or `S3_MANAGED`)*. + +## Types + +A table's schema is a collection of columns, each of which have a `name` and a `type`. Types are recursive structures, consisting of primitive and complex types: + +```ts +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + columns: [{ + name: 'primitive_column', + type: glue.Schema.STRING, + }, { + name: 'array_column', + type: glue.Schema.array(glue.Schema.INTEGER), + comment: 'array', + }, { + name: 'map_column', + type: glue.Schema.map( + glue.Schema.STRING, + glue.Schema.TIMESTAMP), + comment: 'map', + }, { + name: 'struct_column', + type: glue.Schema.struct([{ + name: 'nested_column', + type: glue.Schema.DATE, + comment: 'nested comment', + }]), + comment: "struct", + }], + // ... + database: myDatabase, + dataFormat: glue.DataFormat.JSON, +}); +``` + ## Public FAQ ### What are we launching today? diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts index 04d62997806ed..57bb1cc636442 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/job.ts @@ -512,7 +512,7 @@ export abstract class Job extends JobBase { }; if (props?.quiet) { - args['--enable-continuous-log-filter'] = (props.quiet ?? true).toString(); + args['--enable-continuous-log-filter'] = 'true'; }; // If the developer provided a log group, add its name to the args and update the role. diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts index f8f4a4aa760e7..ef8291a2752d5 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/jobs/pyspark-etl-job.ts @@ -8,7 +8,7 @@ import { SparkUIProps, SparkUILoggingLocation, validateSparkUiPrefix, cleanSpark import { Code } from '../code'; /** - * Spark ETL Jobs class + * PySpark ETL Jobs class * ETL jobs support pySpark and Scala languages, for which there are separate * but similar constructors. ETL jobs default to the G2 worker type, but you * can override this default with other supported worker type values @@ -82,9 +82,6 @@ export class PySparkEtlJob extends Job { super(scope, id, { physicalName: props.jobName, }); - - this.jobName = props.jobName ?? ''; - // Set up role and permissions for principal this.role = props.role, { assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), @@ -121,10 +118,10 @@ export class PySparkEtlJob extends Job { command: { name: JobType.ETL, scriptLocation: this.codeS3ObjectUrl(props.script), - pythonVersion: PythonVersion.THREE, + pythonVersion: PythonVersion.THREE_NINE, }, - glueVersion: props.glueVersion ? props.glueVersion : GlueVersion.V4_0, - workerType: props.workerType ? props.workerType : WorkerType.G_1X, + glueVersion: props.glueVersion ?? GlueVersion.V4_0, + workerType: props.workerType ?? WorkerType.G_1X, numberOfWorkers: props.numberOfWorkers ? props.numberOfWorkers : 10, maxRetries: props.maxRetries, executionProperty: props.maxConcurrentRuns ? { maxConcurrentRuns: props.maxConcurrentRuns } : undefined,