Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into 1345-postgresql-uu…
Browse files Browse the repository at this point in the history
…id-support
  • Loading branch information
nj1973 committed Dec 12, 2024
2 parents 39cbe6b + b20b4dd commit e236280
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 14 deletions.
36 changes: 26 additions & 10 deletions data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,9 +783,24 @@ def append_pre_agg_calc_field(
target_column_type: str,
column_position: int,
) -> dict:
"""Append calculated field for length(string | binary) or epoch_seconds(timestamp) for preprocessing before column validation aggregation."""
"""Append calculated field for length() or epoch_seconds(timestamp) for preprocessing before column validation aggregation."""
depth, cast_type = 0, None
if column_type in ["string", "!string"]:
if any(_ in ["json", "!json"] for _ in [column_type, target_column_type]):
# JSON data which needs casting to string before we apply a length function.
pre_calculated_config = self.build_and_append_pre_agg_calc_config(
source_column,
target_column,
"cast",
column_position,
"string",
depth,
)
source_column = target_column = pre_calculated_config[
consts.CONFIG_FIELD_ALIAS
]
depth = 1
calc_func = "length"
elif column_type in ["string", "!string"]:
calc_func = "length"

elif self._is_uuid(column_type, target_column_type):
Expand All @@ -800,14 +815,12 @@ def append_pre_agg_calc_field(
self.source_client.name == "bigquery"
or self.target_client.name == "bigquery"
):
calc_func = consts.CONFIG_CAST
cast_type = "timestamp"
pre_calculated_config = self.build_and_append_pre_agg_calc_config(
source_column,
target_column,
calc_func,
consts.CONFIG_CAST,
column_position,
cast_type=cast_type,
cast_type="timestamp",
depth=depth,
)
source_column = target_column = pre_calculated_config[
Expand Down Expand Up @@ -909,16 +922,19 @@ def require_pre_agg_calc_field(
agg_type: str,
cast_to_bigint: bool,
) -> bool:
if column_type in ["string", "!string"] and target_column_type in [
"string",
"!string",
]:
if all(
_ in ["string", "!string", "json", "!json"]
for _ in [column_type, target_column_type]
):
# These data types are aggregated using their lengths.
return True
elif self._is_uuid(column_type, target_column_type):
return True
elif column_type in ["binary", "!binary"]:
if agg_type == "count":
# Oracle BLOB is invalid for use with SQL COUNT function.
# The expression below returns True if client is Oracle which
# has the effect of triggering use of byte_length transformation.
return bool(
self.source_client.name == "oracle"
or self.target_client.name == "oracle"
Expand Down
9 changes: 9 additions & 0 deletions tests/resources/oracle_test_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,13 @@ CREATE TABLE pso_data_validator.dvt_ora2pg_types
, col_clob CLOB
, col_nclob NCLOB
, col_uuid RAW(16)
, col_json CLOB
, col_jsonb CLOB
);
ALTER TABLE pso_data_validator.dvt_ora2pg_types
ADD CONSTRAINT dvt_ora2pg_types_chk1 CHECK (col_json IS JSON) ENABLE;
ALTER TABLE pso_data_validator.dvt_ora2pg_types
ADD CONSTRAINT dvt_ora2pg_types_chk2 CHECK (col_jsonb IS JSON) ENABLE;
COMMENT ON TABLE pso_data_validator.dvt_ora2pg_types IS 'Oracle to PostgreSQL integration test table';

-- Literals below match corresponding table in postgresql_test_tables.sql
Expand All @@ -123,6 +129,7 @@ INSERT INTO pso_data_validator.dvt_ora2pg_types VALUES
,UTL_RAW.CAST_TO_RAW('DVT'),UTL_RAW.CAST_TO_RAW('DVT')
,UTL_RAW.CAST_TO_RAW('DVT'),'DVT A','DVT A'
,HEXTORAW('187BDC3B218443B28EC23AC791C5B0F1')
,'{"dvt": 123, "status": "abc"}','{"dvt": 123, "status": "abc"}'
);
INSERT INTO pso_data_validator.dvt_ora2pg_types VALUES
(2,2222,123456789,123456789012345678,1234567890123456789012345
Expand All @@ -137,6 +144,7 @@ INSERT INTO pso_data_validator.dvt_ora2pg_types VALUES
,UTL_RAW.CAST_TO_RAW('DVT'),UTL_RAW.CAST_TO_RAW('DVT DVT')
,UTL_RAW.CAST_TO_RAW('DVT DVT'),'DVT B','DVT B'
,HEXTORAW('287BDC3B218443B28EC23AC791C5B0F1')
,'{"dvt": 234, "status": "def"}','{"dvt": 234, "status": "def"}'
);
INSERT INTO pso_data_validator.dvt_ora2pg_types VALUES
(3,3333,123456789,123456789012345678,1234567890123456789012345
Expand All @@ -151,6 +159,7 @@ INSERT INTO pso_data_validator.dvt_ora2pg_types VALUES
,UTL_RAW.CAST_TO_RAW('DVT'),UTL_RAW.CAST_TO_RAW('DVT DVT DVT')
,UTL_RAW.CAST_TO_RAW('DVT DVT DVT'),'DVT C','DVT C'
,HEXTORAW('387BDC3B218443B28EC23AC791C5B0F1')
,'{"dvt": 345, "status": "ghi"}','{"dvt": 345, "status": "ghi"}'
);
COMMIT;

Expand Down
9 changes: 7 additions & 2 deletions tests/resources/postgresql_test_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ CREATE TABLE pso_data_validator.dvt_ora2pg_types
, col_clob text
, col_nclob text
, col_uuid uuid
, col_json json
, col_jsonb jsonb
);
COMMENT ON TABLE pso_data_validator.dvt_ora2pg_types IS 'Oracle to PostgreSQL integration test table';

Expand All @@ -96,7 +98,8 @@ INSERT INTO pso_data_validator.dvt_ora2pg_types VALUES
,INTERVAL '1 2:03:44.0' DAY TO SECOND(3)
,CAST('DVT' AS BYTEA),CAST('DVT' AS BYTEA)
,CAST('DVT' AS BYTEA),'DVT A','DVT A'
,uuid('187bdc3b218443b28ec23ac791c5b0f1'))
,uuid('187bdc3b218443b28ec23ac791c5b0f1')
,'{"dvt": 123, "status": "abc"}','{"dvt": 123, "status": "abc"}')
,(2,2222,123456789,123456789012345678,1234567890123456789012345
,123.12,123.11
--,123400,0.002
Expand All @@ -108,7 +111,8 @@ INSERT INTO pso_data_validator.dvt_ora2pg_types VALUES
,INTERVAL '2 3:04:55.666' DAY TO SECOND(3)
,CAST('DVT' AS BYTEA),CAST('DVT DVT' AS BYTEA)
,CAST('DVT DVT' AS BYTEA),'DVT B','DVT B'
,uuid('287bdc3b218443b28ec23ac791c5b0f1'))
,uuid('287bdc3b218443b28ec23ac791c5b0f1')
,'{"dvt": 234, "status": "def"}','{"dvt": 234, "status": "def"}')
,(3,3333,123456789,123456789012345678,1234567890123456789012345
,123.123,123.11
--,123400,0.003
Expand All @@ -121,6 +125,7 @@ INSERT INTO pso_data_validator.dvt_ora2pg_types VALUES
,CAST('DVT' AS BYTEA),CAST('DVT DVT DVT' AS BYTEA)
,CAST('DVT DVT DVT' AS BYTEA),'DVT C','DVT C'
,uuid('387bdc3b218443b28ec23ac791c5b0f1')
,'{"dvt": 345, "status": "ghi"}','{"dvt": 345, "status": "ghi"}'
);

/* Following table used for validating generating table partitions */
Expand Down
10 changes: 8 additions & 2 deletions tests/system/data_sources/test_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@
"col_clob",
"col_nclob",
"col_uuid",
"col_json",
"col_jsonb",
]


Expand Down Expand Up @@ -375,8 +377,8 @@ def test_row_validation_oracle_to_postgres():
# TODO col_raw/col_long_raw are blocked by issue-773 (is it even reasonable to expect binary columns to work here?)
# TODO Change hash_cols below to include col_nvarchar_30,col_nchar_2 when issue-772 is complete.
# TODO Change hash_cols below to include col_interval_ds when issue-1214 is complete.
# TODO Change hash_cols below to include col_clob/col_nclob/col_blob/col_json/col_jsonb when issue-1364 is complete.
# Excluded col_float32,col_float64 due to the lossy nature of BINARY_FLOAT/DOUBLE.
# Excluded CLOB/NCLOB/BLOB columns because lob values cannot be concatenated
hash_cols = ",".join(
[
_
Expand All @@ -394,6 +396,8 @@ def test_row_validation_oracle_to_postgres():
"col_nvarchar_30",
"col_nchar_2",
"col_interval_ds",
"col_json",
"col_jsonb",
)
]
)
Expand Down Expand Up @@ -593,8 +597,8 @@ def test_custom_query_row_validation_oracle_to_postgres():
# TODO col_raw/col_long_raw are blocked by issue-773 (is it even reasonable to expect binary columns to work here?)
# TODO Change hash_cols below to include col_nvarchar_30,col_nchar_2 when issue-772 is complete.
# TODO Change hash_cols below to include col_interval_ds when issue-1214 is complete.
# TODO Change hash_cols below to include col_clob/col_nclob/col_blob/col_json/col_jsonb when issue-1364 is complete.
# Excluded col_float32,col_float64 due to the lossy nature of BINARY_FLOAT/DOUBLE.
# Excluded CLOB/NCLOB/BLOB columns because lob values cannot be concatenated
hash_cols = ",".join(
[
_
Expand All @@ -612,6 +616,8 @@ def test_custom_query_row_validation_oracle_to_postgres():
"col_nvarchar_30",
"col_nchar_2",
"col_interval_ds",
"col_json",
"col_jsonb",
)
]
)
Expand Down

0 comments on commit e236280

Please sign in to comment.