From 87480be39ca1e982307c1c125961cfe91b2d1cdd Mon Sep 17 00:00:00 2001 From: Reuven Gonzales Date: Thu, 21 Nov 2024 11:03:41 +0800 Subject: [PATCH] Fix issues with duckdb local run (#2493) * Fix issues with duckdb local run * Add additional test * Improve test docs --- ...test_developer_active_days_over_window.yml | 94 +++++++++++++++++++ .../tests/test_events_daily_to_artifact.yml | 2 +- warehouse/metrics_tools/factory/factory.py | 13 ++- warehouse/metrics_tools/transformer/tables.py | 11 ++- warehouse/metrics_tools/utils/logging.py | 3 + 5 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 warehouse/metrics_mesh/tests/test_developer_active_days_over_window.yml diff --git a/warehouse/metrics_mesh/tests/test_developer_active_days_over_window.yml b/warehouse/metrics_mesh/tests/test_developer_active_days_over_window.yml new file mode 100644 index 000000000..6c2e6e65c --- /dev/null +++ b/warehouse/metrics_mesh/tests/test_developer_active_days_over_window.yml @@ -0,0 +1,94 @@ +test_developer_active_days_to_artifact_over_60_day_window_with_cumulative_active_days: + # Tests rolling count of active days when the user is active 4 of the 5 days + # in the test interval + gateway: local + model: metrics.developer_active_days_to_artifact_over_60_day_window + vars: + start: 2024-01-01 + end: 2024-01-05 + inputs: + metrics.events_daily_to_artifact: + rows: + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + event_source: SOURCE_PROVIDER + event_type: COMMIT_CODE + bucket_day: 2024-01-01 + amount: 1 + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + event_source: SOURCE_PROVIDER + event_type: COMMIT_CODE + bucket_day: 2024-01-02 + amount: 10 + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + event_source: SOURCE_PROVIDER + event_type: COMMIT_CODE + bucket_day: 2024-01-03 + amount: 12 + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + event_source: SOURCE_PROVIDER + event_type: COMMIT_CODE + bucket_day: 2024-01-04 + amount: 20 + outputs: + partial: true + query: + partial: true + rows: + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + metrics_sample_date: 2024-01-01 + amount: 1 + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + metrics_sample_date: 2024-01-02 + amount: 2 + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + metrics_sample_date: 2024-01-03 + amount: 3 + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + metrics_sample_date: 2024-01-04 + amount: 4 + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + metrics_sample_date: 2024-01-05 + amount: 4 + +test_developer_active_days_to_artifact_over_60_day_window_with_1_active_day: + # Tests rolling count of active days when the user is active 1 in the test interval + gateway: local + model: metrics.developer_active_days_to_artifact_over_60_day_window + vars: + start: 2024-01-01 + end: 2024-01-03 + inputs: + metrics.events_daily_to_artifact: + rows: + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + event_source: SOURCE_PROVIDER + event_type: COMMIT_CODE + bucket_day: 2024-01-01 + amount: 10 + outputs: + partial: true + query: + partial: true + rows: + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + metrics_sample_date: 2024-01-01 + amount: 1 + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + metrics_sample_date: 2024-01-02 + amount: 1 + - to_artifact_id: repo_0 + from_artifact_id: dev_0 + metrics_sample_date: 2024-01-03 + amount: 1 \ No newline at end of file diff --git a/warehouse/metrics_mesh/tests/test_events_daily_to_artifact.yml b/warehouse/metrics_mesh/tests/test_events_daily_to_artifact.yml index 2b6dd0a39..d78446d09 100644 --- a/warehouse/metrics_mesh/tests/test_events_daily_to_artifact.yml +++ b/warehouse/metrics_mesh/tests/test_events_daily_to_artifact.yml @@ -1,5 +1,5 @@ - test_events_daily_to_artifact: + gateway: local model: metrics.events_daily_to_artifact vars: start: 2024-01-01 diff --git a/warehouse/metrics_tools/factory/factory.py b/warehouse/metrics_tools/factory/factory.py index ae1fef117..afe63f1eb 100644 --- a/warehouse/metrics_tools/factory/factory.py +++ b/warehouse/metrics_tools/factory/factory.py @@ -366,7 +366,7 @@ def generate_models(self, calling_file: str): ) }, ) - logger.debug("model generation complete") + logger.info("model generation complete") def generate_model_for_rendered_query( self, @@ -567,7 +567,7 @@ def timeseries_metrics( ): add_metrics_tools_to_sqlmesh_logging() - logger.debug("loading timeseries metrics") + logger.info("loading timeseries metrics") calling_file = inspect.stack()[1].filename timeseries_metrics = TimeseriesMetrics.from_raw_options(**raw_options) return timeseries_metrics.generate_models(calling_file) @@ -646,7 +646,14 @@ def generated_rolling_query( locals.update(sqlmesh_vars) runner = MetricsRunner.from_sqlmesh_context(context, query, ref, locals) - yield runner.run_rolling(start, end) + df = runner.run_rolling(start, end) + # If the rolling window is empty we need to yield from an empty tuple + # otherwise sqlmesh fails. See: + # https://sqlmesh.readthedocs.io/en/latest/concepts/models/python_models/#returning-empty-dataframes + if df.empty: + yield from () + else: + yield df def generated_rolling_query_proxy( diff --git a/warehouse/metrics_tools/transformer/tables.py b/warehouse/metrics_tools/transformer/tables.py index ee2ca5fe4..09cf39c43 100644 --- a/warehouse/metrics_tools/transformer/tables.py +++ b/warehouse/metrics_tools/transformer/tables.py @@ -1,12 +1,15 @@ """Transforms table references from an execution context """ +import logging import typing as t from sqlglot import exp from sqlmesh.core.context import ExecutionContext from .base import Transform +logger = logging.getLogger(__name__) + class TableTransform(Transform): def transform_table_name(self, table: exp.Table) -> exp.Table | None: @@ -19,10 +22,9 @@ def transform_tables(node: exp.Expression): actual_table = self.transform_table_name(node) if not actual_table: return node - table_kwargs = {} if node.alias: - table_kwargs["alias"] = node.alias - return exp.to_table(actual_table.this.this, **table_kwargs) + actual_table = actual_table.as_(node.alias) + return actual_table transformed_expressions = [] for expression in query: @@ -53,6 +55,9 @@ def __init__( def transform_table_name(self, table: exp.Table) -> exp.Table | None: table_name = f"{table.db}.{table.this.this}" try: + logger.debug( + f"Transforming tables for query {self._context.table(table_name)}" + ) return exp.to_table(self._context.table(table_name)) except KeyError: return None diff --git a/warehouse/metrics_tools/utils/logging.py b/warehouse/metrics_tools/utils/logging.py index a0edf41e2..22f2cdff0 100644 --- a/warehouse/metrics_tools/utils/logging.py +++ b/warehouse/metrics_tools/utils/logging.py @@ -3,6 +3,8 @@ connected_to_sqlmesh_logs = False +logger = logging.getLogger(__name__) + def add_metrics_tools_to_sqlmesh_logging(): """sqlmesh won't automatically add metrics_tools logging. This will enable @@ -15,6 +17,7 @@ def add_metrics_tools_to_sqlmesh_logging(): if app_name == "sqlmesh" and not connected_to_sqlmesh_logs: add_metrics_tools_to_existing_logger(app_name) connected_to_sqlmesh_logs = True + logger.info("metrics_tools logs connected to sqlmesh") def add_metrics_tools_to_existing_logger(logger_name: str):