Skip to content

Commit

Permalink
fix: Fix issue where long rows were being truncated in raw query command
Browse files Browse the repository at this point in the history
  • Loading branch information
nj1973 committed Dec 2, 2024
1 parent 7bfdfa1 commit 809c3e2
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 19 deletions.
18 changes: 4 additions & 14 deletions data_validation/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
clients,
consts,
exceptions,
raw_query,
state_manager,
util,
)
Expand Down Expand Up @@ -456,19 +457,6 @@ def build_config_managers_from_yaml(args, config_file_path):
return config_managers


def run_raw_query_against_connection(args):
"""Return results of raw query for ad hoc usage."""
mgr = state_manager.StateManager()
client = clients.get_data_client(mgr.get_connection_config(args.conn))
cursor = client.raw_sql(args.query)
res = cursor.fetchall()
try:
cursor.close()
except Exception:
pass
return res


def convert_config_to_yaml(args, config_managers: list):
"""Return dict objects formatted for yaml validations.
Expand Down Expand Up @@ -698,7 +686,9 @@ def main():
elif args.command == "find-tables":
print(find_tables_using_string_matching(args))
elif args.command == "query":
print(run_raw_query_against_connection(args))
raw_query.print_raw_query_output(
raw_query.run_raw_query_against_connection(args)
)
elif args.command == "validate":
validate(args)
elif args.command == "generate-table-partitions":
Expand Down
47 changes: 47 additions & 0 deletions data_validation/raw_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from sqlalchemy.engine.row import Row

from data_validation import clients, state_manager


def run_raw_query_against_connection(args) -> list:
"""Return results of raw query for ad hoc usage."""
mgr = state_manager.StateManager()
client = clients.get_data_client(mgr.get_connection_config(args.conn))
cursor = client.raw_sql(args.query)
res = cursor.fetchall()
try:
cursor.close()
except Exception:
pass
return res


def print_raw_query_output(query_output: list):
"""Print a query resultset avoiding SQLAlchemy "... (nn characters truncated) ..." behaviour.
Args:
query_output (list): A set of rows from a SQLAlchemy query.
"""

def row_to_str(row) -> str:
"""This prevents SQLAlchemy string truncation inside Row() objects by first converting them to a tuple."""
if isinstance(row, Row):
return str(tuple(row))
else:
return str(row)

print([row_to_str(_) for _ in query_output or []])
27 changes: 26 additions & 1 deletion tests/system/data_sources/common_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import pathlib

from data_validation import __main__ as main
from data_validation import consts, data_validation
from data_validation import consts, data_validation, raw_query

from data_validation import (
cli_tools,
Expand Down Expand Up @@ -455,3 +455,28 @@ def custom_query_validation_test(
else:
# With filter on failures the data frame should be empty
assert len(df) == 0


def raw_query_test(
capsys,
conn: str = "mock-conn",
query: str = "select * from pso_data_validator.dvt_core_types",
table: str = None,
expected_rows: int = 3,
):
"""Raw query test."""
parser = cli_tools.configure_arg_parser()
if table:
query = f"select * from {table}"
cli_arg_list = [
"query",
f"--conn={conn}",
f"--query={query}",
]
args = parser.parse_args(cli_arg_list)
rows = raw_query.run_raw_query_against_connection(args)
assert len(rows) == expected_rows
assert len(rows[0]) > 0
raw_query.print_raw_query_output(rows)
captured = capsys.readouterr()
assert "characters truncated" not in captured.out
10 changes: 10 additions & 0 deletions tests/system/data_sources/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
row_validation_many_columns_test,
schema_validation_test,
column_validation_test,
raw_query_test,
row_validation_test,
custom_query_validation_test,
)
Expand Down Expand Up @@ -1380,3 +1381,12 @@ def test_bq_result_handler(mock_conn, bigquery_client, bigquery_dataset_id, capl
bq_result_handler=f"{PROJECT_ID}.{table_id}",
)
assert any(_ for _ in caplog.records if BQRH_WRITE_MESSAGE in _.msg)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
return_value=BQ_CONN,
)
def test_raw_query_dvt_row_types(mock_conn, capsys):
"""Test data-validation query command."""
raw_query_test(capsys)
25 changes: 25 additions & 0 deletions tests/system/data_sources/test_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
find_tables_assertions,
id_type_test_assertions,
null_not_null_assertions,
raw_query_test,
row_validation_many_columns_test,
row_validation_test,
run_test_from_cli_args,
Expand Down Expand Up @@ -770,3 +771,27 @@ def test_row_validation_comp_fields_bool_to_postgres():
tc="pg-conn",
comp_fields="col_bool_dec,col_bool_int,col_bool_ch1,col_bool_chy",
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_raw_query_dvt_row_types(capsys):
"""Test data-validation query command."""
raw_query_test(capsys)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_raw_query_long_string(capsys):
"""Test data-validation query command with very long string output.
We don't need to test this for each engine, just one will suffice."""
raw_query_test(
capsys,
query="""SELECT RPAD('some-long-string',256,'x') c FROM dual UNION ALL
SELECT RPAD('some-long-string',512,'y') c FROM dual""",
)
10 changes: 10 additions & 0 deletions tests/system/data_sources/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
find_tables_assertions,
id_type_test_assertions,
null_not_null_assertions,
raw_query_test,
row_validation_many_columns_test,
run_test_from_cli_args,
partition_table_test,
Expand Down Expand Up @@ -892,3 +893,12 @@ def test_row_validation_identifiers():
tc="mock-conn",
hash="*",
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_raw_query_dvt_row_types(capsys):
"""Test data-validation query command."""
raw_query_test(capsys)
18 changes: 14 additions & 4 deletions tests/system/data_sources/test_teradata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,17 @@
from data_validation import cli_tools, data_validation, consts
from tests.system.data_sources.common_functions import (
binary_key_assertions,
column_validation_test,
custom_query_validation_test,
id_type_test_assertions,
null_not_null_assertions,
row_validation_many_columns_test,
run_test_from_cli_args,
partition_table_test,
partition_query_test,
raw_query_test,
row_validation_many_columns_test,
run_test_from_cli_args,
row_validation_test,
schema_validation_test,
column_validation_test,
custom_query_validation_test,
)
from tests.system.data_sources.test_bigquery import BQ_CONN

Expand Down Expand Up @@ -706,3 +707,12 @@ def test_row_validation_comp_fields_bool_to_bigquery():
tc="bq-conn",
comp_fields="col_bool_dec,col_bool_int,col_bool_ch1,col_bool_chy",
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_raw_query_dvt_row_types(capsys):
"""Test data-validation query command."""
raw_query_test(capsys)
47 changes: 47 additions & 0 deletions tests/unit/test_raw_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from unittest.mock import Mock

from sqlalchemy.engine.row import Row


@pytest.fixture
def module_under_test():
import data_validation.raw_query

return data_validation.raw_query


@pytest.mark.parametrize(
"test_input",
[
[
"a",
"some-long-string".ljust(256, "x"),
],
[
"b",
"some-long-string".ljust(1024, "z"),
],
# I was unable to figure out how to mock up a SQLAlchemy Row() object here.
# Therefore resorted to testing via an integration test.
],
)
def test_print_raw_query_output(module_under_test, capsys, test_input: list):
module_under_test.print_raw_query_output(test_input)
captured = capsys.readouterr()
assert test_input[1] in captured.out
assert "characters truncated" not in captured.out

0 comments on commit 809c3e2

Please sign in to comment.