From ec26503d8ead8f1af7d63f80faf3e871f826e273 Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:29:01 -0500 Subject: [PATCH] feat(sqlmesh): add response time metrics (#2513) * feat(sqlmesh): add prs days to merge model * fix: number to issue_number * feat(sqlmesh): add model for time to first response * fix: small syntax improvements * Add aux table to local duckdb * Fix local pull * Fix factory * Get response times and time to merge * use correct table --------- Co-authored-by: Reuven V. Gonzales --- .../models/code/issue_event_time_deltas.sql | 31 +++++++++++++++++++ .../models/first_of_event_from_artifact.sql | 22 +++++++++++++ .../models/last_of_event_from_artifact.sql | 22 +++++++++++++ .../metrics_mesh/models/metrics_factories.py | 24 +++++++++++++- .../oso_metrics/prs_time_to_merge.sql | 14 +++++++++ .../oso_metrics/time_to_first_response.sql | 23 ++++++++++++++ warehouse/metrics_tools/factory/factory.py | 1 + warehouse/metrics_tools/joiner/__init__.py | 8 +++-- warehouse/metrics_tools/joiner/test_joiner.py | 4 +-- warehouse/metrics_tools/local/utils.py | 3 +- 10 files changed, 145 insertions(+), 7 deletions(-) create mode 100644 warehouse/metrics_mesh/models/code/issue_event_time_deltas.sql create mode 100644 warehouse/metrics_mesh/models/first_of_event_from_artifact.sql create mode 100644 warehouse/metrics_mesh/models/last_of_event_from_artifact.sql create mode 100644 warehouse/metrics_mesh/oso_metrics/prs_time_to_merge.sql create mode 100644 warehouse/metrics_mesh/oso_metrics/time_to_first_response.sql diff --git a/warehouse/metrics_mesh/models/code/issue_event_time_deltas.sql b/warehouse/metrics_mesh/models/code/issue_event_time_deltas.sql new file mode 100644 index 000000000..4487f3248 --- /dev/null +++ b/warehouse/metrics_mesh/models/code/issue_event_time_deltas.sql @@ -0,0 +1,31 @@ +-- Model that records the delta (in seconds) since the creation of the issue or +-- pr. +MODEL ( + name metrics.issue_event_time_deltas, + kind VIEW +); +select `time`, + event_type, + event_source, + @oso_id( + event_source, + to_artifact_id, + issue_number + ) as issue_id, + issue_number, + to_artifact_id, + from_artifact_id, + created_at, + merged_at, + closed_at, + date_diff('second', created_at, `time`) as created_delta, + case + when merged_at is null then null + else date_diff('second', merged_at, `time`) + end as merged_delta, + case + when closed_at is null then null + else date_diff('second', closed_at, `time`) + end as closed_delta, + comments +from @oso_source.timeseries_events_aux_issues_by_artifact_v0 \ No newline at end of file diff --git a/warehouse/metrics_mesh/models/first_of_event_from_artifact.sql b/warehouse/metrics_mesh/models/first_of_event_from_artifact.sql new file mode 100644 index 000000000..7eab56e2d --- /dev/null +++ b/warehouse/metrics_mesh/models/first_of_event_from_artifact.sql @@ -0,0 +1,22 @@ +MODEL ( + name metrics.first_of_event_from_artifact, + kind FULL, + partitioned_by (day("time"), "event_type", "event_source"), + grain ( + time, + event_type, + event_source, + from_artifact_id, + to_artifact_id + ), +); +select MIN(time) as time, + event_type, + event_source, + from_artifact_id, + to_artifact_id +from @oso_source.timeseries_events_by_artifact_v0 +group by event_type, + event_source, + from_artifact_id, + to_artifact_id \ No newline at end of file diff --git a/warehouse/metrics_mesh/models/last_of_event_from_artifact.sql b/warehouse/metrics_mesh/models/last_of_event_from_artifact.sql new file mode 100644 index 000000000..efd017743 --- /dev/null +++ b/warehouse/metrics_mesh/models/last_of_event_from_artifact.sql @@ -0,0 +1,22 @@ +MODEL ( + name metrics.last_of_event_from_artifact, + kind FULL, + partitioned_by (day("time"), "event_type", "event_source"), + grain ( + time, + event_type, + event_source, + from_artifact_id, + to_artifact_id + ), +); +select MAX(time) as time, + event_type, + event_source, + from_artifact_id, + to_artifact_id +from @oso_source.timeseries_events_by_artifact_v0 +group by event_type, + event_source, + from_artifact_id, + to_artifact_id \ No newline at end of file diff --git a/warehouse/metrics_mesh/models/metrics_factories.py b/warehouse/metrics_mesh/models/metrics_factories.py index 2b6095837..e4c2dd28c 100644 --- a/warehouse/metrics_mesh/models/metrics_factories.py +++ b/warehouse/metrics_mesh/models/metrics_factories.py @@ -8,6 +8,10 @@ start="2015-01-01", catalog="metrics", model_prefix="timeseries", + timeseries_sources=[ + "events_daily_to_artifact", + "issue_event_time_deltas", + ], metric_queries={ # This will automatically generate star counts for the given roll up periods. # A time_aggregation is just a simple addition of the aggregation. So basically we @@ -165,7 +169,7 @@ ), entity_types=["artifact", "project", "collection"], ), - "closed_issues_6_months": MetricQueryDef( + "closed_issues": MetricQueryDef( ref="issues_closed.sql", rolling=RollingConfig( windows=[180], @@ -174,6 +178,24 @@ ), entity_types=["artifact", "project", "collection"], ), + "avg_prs_time_to_merge": MetricQueryDef( + ref="prs_time_to_merge.sql", + rolling=RollingConfig( + windows=[90, 180], + unit="day", + cron="@daily", + ), + entity_types=["artifact", "project", "collection"], + ), + "avg_time_to_first_response": MetricQueryDef( + ref="prs_time_to_merge.sql", + rolling=RollingConfig( + windows=[90, 180], + unit="day", + cron="@daily", + ), + entity_types=["artifact", "project", "collection"], + ), }, default_dialect="clickhouse", ) diff --git a/warehouse/metrics_mesh/oso_metrics/prs_time_to_merge.sql b/warehouse/metrics_mesh/oso_metrics/prs_time_to_merge.sql new file mode 100644 index 000000000..d5dce2dd8 --- /dev/null +++ b/warehouse/metrics_mesh/oso_metrics/prs_time_to_merge.sql @@ -0,0 +1,14 @@ +select @metrics_sample_date(time) as metrics_sample_date, + event_source, + to_artifact_id, + '' as from_artifact_id, + @metric_name() as metric, + AVG(created_delta) as amount +from metrics.issue_event_time_deltas +where event_type = 'PULL_REQUEST_MERGED' + and `time` BETWEEN @metrics_start('DATE') and @metrics_end('DATE') +group by 1, + metric, + from_artifact_id, + to_artifact_id, + event_source, \ No newline at end of file diff --git a/warehouse/metrics_mesh/oso_metrics/time_to_first_response.sql b/warehouse/metrics_mesh/oso_metrics/time_to_first_response.sql new file mode 100644 index 000000000..5fab019a8 --- /dev/null +++ b/warehouse/metrics_mesh/oso_metrics/time_to_first_response.sql @@ -0,0 +1,23 @@ +select @metrics_sample_date(time) as metrics_sample_date, + event_source, + to_artifact_id, + '' as from_artifact_id, + @metric_name() as metric, + AVG(created_delta) as amount +from metrics.issue_event_time_deltas +where ( + ( + event_type in ("PULL_REQUEST_MERGED", "ISSUE_CLOSED") + and comments = 0 + ) + or ( + event_type in ("PULL_REQUEST_REVIEW_COMMENT", "ISSUE_COMMENT") + and comments = 1 + ) + ) + and `time` BETWEEN @metrics_start('DATE') and @metrics_end('DATE') +group by 1, + metric, + from_artifact_id, + to_artifact_id, + event_source, \ No newline at end of file diff --git a/warehouse/metrics_tools/factory/factory.py b/warehouse/metrics_tools/factory/factory.py index afe63f1eb..7f3c6a238 100644 --- a/warehouse/metrics_tools/factory/factory.py +++ b/warehouse/metrics_tools/factory/factory.py @@ -239,6 +239,7 @@ def _generate_metrics_queries( QualifyTransform(), JoinerTransform( ref["entity_type"], + self._timeseries_sources, ), ], ) diff --git a/warehouse/metrics_tools/joiner/__init__.py b/warehouse/metrics_tools/joiner/__init__.py index 57897766e..9b6ac5a8f 100644 --- a/warehouse/metrics_tools/joiner/__init__.py +++ b/warehouse/metrics_tools/joiner/__init__.py @@ -7,8 +7,9 @@ class JoinerTransform(Transform): - def __init__(self, entity_type: str): + def __init__(self, entity_type: str, timeseries_sources: t.List[str]): self._entity_type = entity_type + self._timeseries_sources = timeseries_sources def __call__(self, query: t.List[exp.Expression]) -> t.List[exp.Expression]: entity_type = self._entity_type @@ -23,7 +24,7 @@ def _transform(node: exp.Expression): # Check if this using the timeseries source tables as a join or the from is_using_timeseries_source = False for table in select.find_all(exp.Table): - if table.this.this in ["events_daily_to_artifact"]: + if table.this.this in self._timeseries_sources: is_using_timeseries_source = True if not is_using_timeseries_source: return node @@ -110,6 +111,7 @@ def _transform(node: exp.Expression): def joiner_transform( query: str, entity_type: str, + timeseries_sources: t.List[str], rolling_window: t.Optional[int] = None, rolling_unit: t.Optional[str] = None, time_aggregation: t.Optional[str] = None, @@ -119,7 +121,7 @@ def joiner_transform( transformer = SQLTransformer( transforms=[ # Semantic transform - JoinerTransform(entity_type) + JoinerTransform(entity_type, timeseries_sources) ] ) return transformer.transform(query) diff --git a/warehouse/metrics_tools/joiner/test_joiner.py b/warehouse/metrics_tools/joiner/test_joiner.py index 011a5a057..c3b7e9c7d 100644 --- a/warehouse/metrics_tools/joiner/test_joiner.py +++ b/warehouse/metrics_tools/joiner/test_joiner.py @@ -13,12 +13,12 @@ def get_sql_fixture(sql_path: str) -> str: def test_factory(): input = get_sql_fixture("basic/input.sql") - artifact = joiner_transform(input, "artifact") + artifact = joiner_transform(input, "artifact", ["events_daily_to_artifact"]) assert artifact is not None assert len(artifact) == 1 assert_same_sql(artifact[0], get_sql_fixture("basic/expected_artifact.sql")) - project = joiner_transform(input, "project") + project = joiner_transform(input, "project", ["events_daily_to_artifact"]) assert project is not None assert len(project) == 1 assert_same_sql(project[0], get_sql_fixture("basic/expected_project.sql")) diff --git a/warehouse/metrics_tools/local/utils.py b/warehouse/metrics_tools/local/utils.py index b64fdfba6..8128cc53e 100644 --- a/warehouse/metrics_tools/local/utils.py +++ b/warehouse/metrics_tools/local/utils.py @@ -34,9 +34,10 @@ def initialize_local_duckdb(path: str): bq_to_duckdb( { # We need to rename this once we run the oso_playground dbt again - "opensource-observer.oso_playground.int_events": "sources.timeseries_events_by_artifact_v0", + "opensource-observer.oso_playground.timeseries_events_by_artifact_v0": "sources.timeseries_events_by_artifact_v0", "opensource-observer.oso_playground.artifacts_by_project_v1": "sources.artifacts_by_project_v1", "opensource-observer.oso_playground.projects_by_collection_v1": "sources.projects_by_collection_v1", + "opensource-observer.oso_playground.timeseries_events_aux_issues_by_artifact_v0": "sources.timeseries_events_aux_issues_by_artifact_v0", }, path, )