From 1eb93b01028e41fb21afeb59f8cc981b26e314b8 Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Tue, 30 Jul 2024 17:58:44 -0400 Subject: [PATCH] fix: use rolling windows for ftdevs average in code metrics (#1889) --- ...de_metric__fulltime_developers_average.sql | 48 +----- ..._artifact__fulltime_developers_average.sql | 48 +----- ...nt_timeseries_code_metrics__developers.sql | 12 +- ...s_code_metrics_by_artifact__developers.sql | 148 ++++++++++++++++++ 4 files changed, 165 insertions(+), 91 deletions(-) create mode 100644 warehouse/dbt/models/intermediate/metrics/timeseries/int_timeseries_code_metrics_by_artifact__developers.sql diff --git a/warehouse/dbt/models/intermediate/metrics/code/int_code_metric__fulltime_developers_average.sql b/warehouse/dbt/models/intermediate/metrics/code/int_code_metric__fulltime_developers_average.sql index 4cc9766d9..c456bf1ea 100644 --- a/warehouse/dbt/models/intermediate/metrics/code/int_code_metric__fulltime_developers_average.sql +++ b/warehouse/dbt/models/intermediate/metrics/code/int_code_metric__fulltime_developers_average.sql @@ -1,51 +1,11 @@ -{% set fulltime_dev_days = 10 %} - - -with developer_commit_days as ( - select - project_id, - event_source, - from_artifact_id, - bucket_day - from {{ ref('int_events_daily_to_project') }} - where event_type = 'COMMIT_CODE' -), - -rolling_commit_days as ( - select - d1.project_id, - d1.event_source, - d1.from_artifact_id, - d1.bucket_day, - COUNT(distinct d2.bucket_day) as num_commit_days - from developer_commit_days as d1 - inner join developer_commit_days as d2 - on - d1.project_id = d2.project_id - and d1.from_artifact_id = d2.from_artifact_id - and ( - d2.bucket_day between - DATE_SUB(d1.bucket_day, interval 30 day) and d1.bucket_day - ) - group by - d1.project_id, - d1.event_source, - d1.from_artifact_id, - d1.bucket_day -), - -ftdevs as ( +with ftdevs as ( select project_id, event_source, bucket_day, - COUNT(distinct from_artifact_id) as amount - from rolling_commit_days - where num_commit_days >= {{ fulltime_dev_days }} - group by - project_id, - event_source, - bucket_day + amount + from {{ ref('int_timeseries_code_metrics__developers') }} + where metric = 'fulltime_developers' ) select diff --git a/warehouse/dbt/models/intermediate/metrics/code/int_code_metric_artifact__fulltime_developers_average.sql b/warehouse/dbt/models/intermediate/metrics/code/int_code_metric_artifact__fulltime_developers_average.sql index 99ae1353c..88cdfd061 100644 --- a/warehouse/dbt/models/intermediate/metrics/code/int_code_metric_artifact__fulltime_developers_average.sql +++ b/warehouse/dbt/models/intermediate/metrics/code/int_code_metric_artifact__fulltime_developers_average.sql @@ -1,51 +1,11 @@ -{% set fulltime_dev_days = 10 %} - - -with developer_commit_days as ( - select - to_artifact_id, - event_source, - from_artifact_id, - bucket_day - from {{ ref('int_events_daily_to_artifact') }} - where event_type = 'COMMIT_CODE' -), - -rolling_commit_days as ( - select - d1.to_artifact_id, - d1.event_source, - d1.from_artifact_id, - d1.bucket_day, - COUNT(distinct d2.bucket_day) as num_commit_days - from developer_commit_days as d1 - inner join developer_commit_days as d2 - on - d1.to_artifact_id = d2.to_artifact_id - and d1.from_artifact_id = d2.from_artifact_id - and ( - d2.bucket_day between - DATE_SUB(d1.bucket_day, interval 30 day) and d1.bucket_day - ) - group by - d1.to_artifact_id, - d1.event_source, - d1.from_artifact_id, - d1.bucket_day -), - -ftdevs as ( +with ftdevs as ( select to_artifact_id, event_source, bucket_day, - COUNT(distinct from_artifact_id) as amount - from rolling_commit_days - where num_commit_days >= {{ fulltime_dev_days }} - group by - to_artifact_id, - event_source, - bucket_day + amount + from {{ ref('int_timeseries_code_metrics_by_artifact__developers') }} + where metric = 'fulltime_developers' ) select diff --git a/warehouse/dbt/models/intermediate/metrics/timeseries/int_timeseries_code_metrics__developers.sql b/warehouse/dbt/models/intermediate/metrics/timeseries/int_timeseries_code_metrics__developers.sql index 229c03e84..a681f291c 100644 --- a/warehouse/dbt/models/intermediate/metrics/timeseries/int_timeseries_code_metrics__developers.sql +++ b/warehouse/dbt/models/intermediate/metrics/timeseries/int_timeseries_code_metrics__developers.sql @@ -1,3 +1,9 @@ +{{ + config( + materialized='table' + ) +}} + {% set fulltime_dev_days = 10 %} with commits as ( @@ -5,7 +11,7 @@ with commits as ( from_artifact_id as developer_id, project_id, event_source, - DATE(bucket_day) as bucket_day, + bucket_day, CAST(SUM(amount) > 0 as int64) as commit_count from {{ ref('int_events_daily_to_project') }} where event_type = 'COMMIT_CODE' @@ -31,13 +37,13 @@ calendar as ( select project_id, event_source, - DATE_ADD(first_commit_date, interval day_offset day) as bucket_day + TIMESTAMP_ADD(first_commit_date, interval day_offset day) as bucket_day from project_start_dates, UNNEST( GENERATE_ARRAY( 0, - DATE_DIFF( + TIMESTAMP_DIFF( (select MAX(bucket_day) as last_commit_date from commits), first_commit_date, day ) diff --git a/warehouse/dbt/models/intermediate/metrics/timeseries/int_timeseries_code_metrics_by_artifact__developers.sql b/warehouse/dbt/models/intermediate/metrics/timeseries/int_timeseries_code_metrics_by_artifact__developers.sql new file mode 100644 index 000000000..d9c4bf436 --- /dev/null +++ b/warehouse/dbt/models/intermediate/metrics/timeseries/int_timeseries_code_metrics_by_artifact__developers.sql @@ -0,0 +1,148 @@ +{{ + config( + materialized='table' + ) +}} + +{% set fulltime_dev_days = 10 %} + +with commits as ( + select + from_artifact_id as developer_id, + to_artifact_id, + event_source, + bucket_day, + CAST(SUM(amount) > 0 as int64) as commit_count + from {{ ref('int_events_daily_to_artifact') }} + where event_type = 'COMMIT_CODE' + group by + from_artifact_id, + to_artifact_id, + event_source, + bucket_day +), + +to_artifact_start_dates as ( + select + to_artifact_id, + event_source, + MIN(bucket_day) as first_commit_date + from commits + group by + to_artifact_id, + event_source +), + +calendar as ( + select + to_artifact_id, + event_source, + TIMESTAMP_ADD(first_commit_date, interval day_offset day) as bucket_day + from + to_artifact_start_dates, + UNNEST( + GENERATE_ARRAY( + 0, + TIMESTAMP_DIFF( + (select MAX(bucket_day) as last_commit_date from commits), + first_commit_date, day + ) + ) + ) as day_offset +), + +devs as ( + select distinct developer_id + from commits +), + +developer_to_artifact_dates as ( + select + devs.developer_id, + calendar.to_artifact_id, + calendar.bucket_day, + calendar.event_source + from calendar + cross join devs +), + +filled_data as ( + select + dpd.bucket_day, + dpd.developer_id, + dpd.to_artifact_id, + dpd.event_source, + COALESCE(c.commit_count, 0) as commit_count + from developer_to_artifact_dates as dpd + left join commits as c + on + dpd.bucket_day = c.bucket_day + and dpd.developer_id = c.developer_id + and dpd.to_artifact_id = c.to_artifact_id + and dpd.event_source = c.event_source +), + +rolling_commit_days as ( + select + bucket_day, + developer_id, + to_artifact_id, + event_source, + SUM(commit_count) over ( + partition by developer_id, to_artifact_id, event_source + order by bucket_day + rows between 29 preceding and current row + ) as num_commit_days + from filled_data +), + +ft_devs as ( + select + to_artifact_id, + event_source, + bucket_day, + 'fulltime_developers' as metric, + COUNT(distinct developer_id) as amount + from rolling_commit_days + where num_commit_days >= {{ fulltime_dev_days }} + group by + to_artifact_id, + event_source, + bucket_day +), + +pt_devs as ( + select + to_artifact_id, + event_source, + bucket_day, + 'parttime_developers' as metric, + COUNT(distinct developer_id) as amount + from rolling_commit_days + where num_commit_days >= 1 and num_commit_days < {{ fulltime_dev_days }} + group by + to_artifact_id, + event_source, + bucket_day +), + +active_devs as ( + select + to_artifact_id, + event_source, + bucket_day, + 'active_developers' as metric, + COUNT(distinct developer_id) as amount + from rolling_commit_days + where num_commit_days >= 1 + group by + to_artifact_id, + event_source, + bucket_day +) + +select * from ft_devs +union all +select * from pt_devs +union all +select * from active_devs