From 70b228d1d31af9cf578b13018b60900e232b1193 Mon Sep 17 00:00:00 2001 From: Reuven Gonzales Date: Fri, 29 Nov 2024 17:16:14 +0900 Subject: [PATCH] Add metrics rendering tool (#2546) * Add metrics rendering tool * Add some inline docs --- warehouse/metrics_tools/factory/factory.py | 18 ++++++- warehouse/metrics_tools/utils/logging.py | 6 ++- warehouse/metrics_tools/utils/testing.py | 3 ++ warehouse/oso_lets_go/cli.py | 62 ++++++++++++++++++++++ 4 files changed, 86 insertions(+), 3 deletions(-) diff --git a/warehouse/metrics_tools/factory/factory.py b/warehouse/metrics_tools/factory/factory.py index 7f3c6a238..00af9291f 100644 --- a/warehouse/metrics_tools/factory/factory.py +++ b/warehouse/metrics_tools/factory/factory.py @@ -563,14 +563,28 @@ def generated_model_additional_macros( return [metrics_end, metrics_start, metrics_sample_date] +# Specifically for testing. This is used if the +# `metrics_tools.utils.testing.ENABLE_TIMESERIES_DEBUG` variable is true. This +# is for loading all of the timeseries metrics from inside the metrics_mesh +# project and inspecting the actually rendered queries for testing purposes. +# It's a bit of a hack but it will work for the current purposes. +GLOBAL_TIMESERIES_METRICS: t.Dict[str, TimeseriesMetrics] = {} + + def timeseries_metrics( **raw_options: t.Unpack[TimeseriesMetricsOptions], ): - add_metrics_tools_to_sqlmesh_logging() + from metrics_tools.utils.testing import ENABLE_TIMESERIES_DEBUG + add_metrics_tools_to_sqlmesh_logging() logger.info("loading timeseries metrics") - calling_file = inspect.stack()[1].filename + frame_info = inspect.stack()[1] + calling_file = frame_info.filename timeseries_metrics = TimeseriesMetrics.from_raw_options(**raw_options) + + if ENABLE_TIMESERIES_DEBUG: + GLOBAL_TIMESERIES_METRICS[calling_file] = timeseries_metrics + return timeseries_metrics.generate_models(calling_file) diff --git a/warehouse/metrics_tools/utils/logging.py b/warehouse/metrics_tools/utils/logging.py index 22f2cdff0..d3f4dcc92 100644 --- a/warehouse/metrics_tools/utils/logging.py +++ b/warehouse/metrics_tools/utils/logging.py @@ -13,7 +13,11 @@ def add_metrics_tools_to_sqlmesh_logging(): global connected_to_sqlmesh_logs - app_name = os.path.basename(__main__.__file__) + try: + app_name = os.path.basename(__main__.__file__) + except AttributeError: + # Do nothing if __main__.__file__ doesn't exist + return if app_name == "sqlmesh" and not connected_to_sqlmesh_logs: add_metrics_tools_to_existing_logger(app_name) connected_to_sqlmesh_logs = True diff --git a/warehouse/metrics_tools/utils/testing.py b/warehouse/metrics_tools/utils/testing.py index 3caea2099..b45834b30 100644 --- a/warehouse/metrics_tools/utils/testing.py +++ b/warehouse/metrics_tools/utils/testing.py @@ -7,6 +7,9 @@ from oso_dagster.cbt.utils.compare import is_same_sql +ENABLE_TIMESERIES_DEBUG = False + + def assert_same_sql(actual: exp.Expression | str, expected: exp.Expression | str): if isinstance(actual, str): actual = parse_one(actual) diff --git a/warehouse/oso_lets_go/cli.py b/warehouse/oso_lets_go/cli.py index e5cd3230c..ce836b01d 100644 --- a/warehouse/oso_lets_go/cli.py +++ b/warehouse/oso_lets_go/cli.py @@ -2,7 +2,12 @@ A catchall for development environment tools related to the python tooling. """ +import typing as t +from pickle import GLOBAL import dotenv +from metrics_tools.factory.factory import MetricQueryConfig +from oso_lets_go.wizard import MultipleChoiceInput +from sqlglot import pretty dotenv.load_dotenv() @@ -11,6 +16,9 @@ from metrics_tools.local.utils import initialize_local_duckdb, reset_local_duckdb +CURR_DIR = os.path.dirname(__file__) +METRICS_MESH_DIR = os.path.abspath(os.path.join(CURR_DIR, "../metrics_mesh")) + @click.group() @click.option("--debug/--no-debug", default=False) @@ -25,6 +33,60 @@ def metrics(): pass +@metrics.command() +@click.argument("metric") +@click.option( + "--factory-path", + default=os.path.join(METRICS_MESH_DIR, "models/metrics_factories.py"), +) +@click.option("--dialect", default="duckdb", help="The dialect to render") +@click.pass_context +def render(ctx: click.Context, metric: str, factory_path: str, dialect: str): + """Renders a given metric query. Useful for testing + + Usage: + + $ oso metrics render + """ + + # Select all the available options for the metric + import importlib.util + from metrics_tools.utils import testing + from sqlmesh.core.dialect import parse_one + + testing.ENABLE_TIMESERIES_DEBUG = True + + from metrics_tools.factory.factory import GLOBAL_TIMESERIES_METRICS + + # Run the metrics factory in the sqlmesh project. This uses a single default + # location for now. + spec = importlib.util.spec_from_file_location( + "metrics_mesh.metrics_factories", factory_path + ) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + + timeseries_metrics = GLOBAL_TIMESERIES_METRICS[factory_path] + + matches: t.Dict[str, MetricQueryConfig] = {} + + for depth, config, deps in timeseries_metrics.generate_ordered_queries(): + if config["ref"]["name"] == metric: + matches[config["table_name"]] = config + if config["table_name"] == metric: + matches[config["table_name"]] = config + break + + if not len(matches): + print("No matching metrics") + return + if len(matches) > 1: + choice = MultipleChoiceInput(dict(zip(matches.keys(), matches.keys()))).render() + else: + choice = list(matches.keys())[0] + print(matches[choice]["rendered_query"].sql(pretty=True, dialect=dialect)) + + @metrics.group() @click.pass_context def local(ctx: click.Context):