Skip to content

Commit

Permalink
feat: artifact-level metrics (#1867)
Browse files Browse the repository at this point in the history
* dbt: events daily to artifact model

* chore: implement artifact versions of existing metrics

* dbt: add int_code_metrics_by_artifact model

* fix: update fulltime devs metric

* feat: code_metrics_by_artifact_v0 mart
  • Loading branch information
ccerv1 authored Jul 29, 2024
1 parent 2895843 commit 6c012e8
Show file tree
Hide file tree
Showing 8 changed files with 402 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{#
All events to an artifact, bucketed by day
As some artifacts may be associated with multiple projects,
we need to select distinct first and then do the grouping
#}

with events as (
select distinct
from_artifact_id,
to_artifact_id,
event_source,
event_type,
time,
amount
from {{ ref('int_events') }}
)

select
from_artifact_id,
to_artifact_id,
event_source,
event_type,
TIMESTAMP_TRUNC(time, day) as bucket_day,
SUM(amount) as amount
from events
group by
from_artifact_id,
to_artifact_id,
event_source,
event_type,
TIMESTAMP_TRUNC(time, day)
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
select
events.to_artifact_id,
events.event_source,
time_intervals.time_interval,
'active_developer_count' as metric,
COUNT(distinct events.from_artifact_id) as amount
from {{ ref('int_events_daily_to_artifact') }} as events
cross join {{ ref('int_time_intervals') }} as time_intervals
where
events.event_type = 'COMMIT_CODE'
and events.bucket_day >= time_intervals.start_date
group by
events.to_artifact_id,
events.event_source,
time_intervals.time_interval
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
select
events.to_artifact_id,
events.event_source,
time_intervals.time_interval,
CONCAT(LOWER(events.event_type), '_count') as metric,
SUM(events.amount) as amount
from {{ ref('int_events_daily_to_artifact') }} as events
cross join {{ ref('int_time_intervals') }} as time_intervals
where
events.bucket_day >= time_intervals.start_date
and events.event_type in (
'COMMIT_CODE',
'PULL_REQUEST_OPENED',
'PULL_REQUEST_MERGED',
'ISSUE_OPENED',
'ISSUE_CLOSED'
)
group by
events.to_artifact_id,
events.event_source,
time_intervals.time_interval,
events.event_type
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
select
events.to_artifact_id,
events.event_source,
time_intervals.time_interval,
'contributor_count' as metric,
COUNT(distinct events.from_artifact_id) as amount
from {{ ref('int_events_daily_to_artifact') }} as events
cross join {{ ref('int_time_intervals') }} as time_intervals
where
events.event_type in (
'COMMIT_CODE',
'PULL_REQUEST_OPENED',
'ISSUE_OPENED'
)
and events.bucket_day >= time_intervals.start_date
group by
events.to_artifact_id,
events.event_source,
time_intervals.time_interval
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{% set fulltime_dev_days = 10 %}


with developer_commit_days as (
select
to_artifact_id,
event_source,
from_artifact_id,
bucket_day
from {{ ref('int_events_daily_to_artifact') }}
where event_type = 'COMMIT_CODE'
),

rolling_commit_days as (
select
d1.to_artifact_id,
d1.event_source,
d1.from_artifact_id,
d1.bucket_day,
COUNT(distinct d2.bucket_day) as num_commit_days
from developer_commit_days as d1
inner join developer_commit_days as d2
on
d1.to_artifact_id = d2.to_artifact_id
and d1.from_artifact_id = d2.from_artifact_id
and (
d2.bucket_day between
DATE_SUB(d1.bucket_day, interval 30 day) and d1.bucket_day
)
group by
d1.to_artifact_id,
d1.event_source,
d1.from_artifact_id,
d1.bucket_day
),

ftdevs as (
select
to_artifact_id,
event_source,
bucket_day,
COUNT(distinct from_artifact_id) as amount
from rolling_commit_days
where num_commit_days >= {{ fulltime_dev_days }}
group by
to_artifact_id,
event_source,
bucket_day
)

select
ftdevs.to_artifact_id,
ftdevs.event_source,
time_intervals.time_interval,
'fulltime_developer_average' as metric,
(
SUM(ftdevs.amount)
/ DATE_DIFF(CURRENT_DATE(), MAX(DATE(time_intervals.start_date)), day)
) as amount
from ftdevs
cross join {{ ref('int_time_intervals') }} as time_intervals
where
ftdevs.bucket_day >= time_intervals.start_date
and time_intervals.time_interval != 'ALL'
group by
ftdevs.to_artifact_id,
ftdevs.event_source,
time_intervals.time_interval
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
with user_stats as (
select
from_artifact_id,
event_source,
to_artifact_id,
min(bucket_day) as first_day
from {{ ref('int_events_daily_to_artifact') }}
where
event_type in (
'COMMIT_CODE',
'PULL_REQUEST_OPENED',
'ISSUE_OPENED'
)
group by
from_artifact_id,
event_source,
to_artifact_id
)

select
events.to_artifact_id,
events.event_source,
time_intervals.time_interval,
'new_contributor_count' as metric,
count(
distinct
case
when user_stats.first_day >= time_intervals.start_date
then events.from_artifact_id
end
) as amount
from {{ ref('int_events_daily_to_artifact') }} as events
inner join user_stats
on
events.from_artifact_id = user_stats.from_artifact_id
and events.to_artifact_id = user_stats.to_artifact_id
and events.event_source = user_stats.event_source
cross join {{ ref('int_time_intervals') }} as time_intervals
where
events.event_type in (
'COMMIT_CODE',
'PULL_REQUEST_OPENED',
'ISSUE_OPENED'
)
and events.bucket_day >= time_intervals.start_date
group by
events.to_artifact_id,
events.event_source,
time_intervals.time_interval
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
{{
config(
materialized='table'
)
}}

with metrics as (
select * from {{ ref('int_code_metric_artifact__active_developers') }}
union all
select * from {{ ref('int_code_metric_artifact__commits_prs_issues') }}
union all
select * from {{ ref('int_code_metric_artifact__contributors') }}
union all
select *
from {{ ref('int_code_metric_artifact__fulltime_developers_average') }}
union all
select * from {{ ref('int_code_metric_artifact__new_contributors') }}
),

aggs as (
select
to_artifact_id as artifact_id,
event_source,
SUM(
case
when
metric = 'commit_code_count'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as commit_count_6_months,
SUM(
case
when
metric = 'pull_request_opened_count'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as opened_pull_request_count_6_months,
SUM(
case
when
metric = 'pull_request_merged_count'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as merged_pull_request_count_6_months,
SUM(
case
when
metric = 'issue_opened_count'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as opened_issue_count_6_months,
SUM(
case
when
metric = 'issue_closed_count'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as closed_issue_count_6_months,
SUM(
case
when
metric = 'active_developer_count'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as active_developer_count_6_months,
SUM(
case
when
metric = 'contributor_count'
and time_interval = 'ALL'
then amount
else 0
end
) as contributor_count,
SUM(
case
when
metric = 'contributor_count'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as contributor_count_6_months,
SUM(
case
when
metric = 'new_contributor_count'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as new_contributor_count_6_months,
SUM(
case
when
metric = 'fulltime_developer_average'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as fulltime_developer_average_6_months
from metrics
group by
to_artifact_id,
event_source
),

repos as (
select
artifact_id,
artifact_namespace,
artifact_name,
artifact_source as event_source,
MIN(first_commit_time) as first_commit_date,
MAX(last_commit_time) as last_commit_date,
MAX(distinct artifact_id) as repository_count,
MAX(star_count) as star_count,
MAX(fork_count) as fork_count
from {{ ref('int_repo_metrics_by_project') }}
group by
artifact_id,
artifact_namespace,
artifact_name,
artifact_source
),

code_metrics as (
select
repos.*,
aggs.* except (artifact_id, event_source)
from repos
left join aggs
on
repos.artifact_id = aggs.artifact_id
and repos.event_source = aggs.event_source
)

select
artifact_id,
artifact_namespace,
artifact_name,
event_source,
first_commit_date,
last_commit_date,
repository_count,
star_count,
fork_count,
contributor_count,
contributor_count_6_months,
new_contributor_count_6_months,
fulltime_developer_average_6_months,
active_developer_count_6_months,
commit_count_6_months,
opened_pull_request_count_6_months,
merged_pull_request_count_6_months,
opened_issue_count_6_months,
closed_issue_count_6_months
from code_metrics
where event_source is not null
Loading

0 comments on commit 6c012e8

Please sign in to comment.