Skip to content

Commit

Permalink
final updates to v1 project metrics models (#1450)
Browse files Browse the repository at this point in the history
* rename network as event_source

* add event_source to code metrics models

* bring artifact source from repo metrics into code metrics

* bug: rename network to event_source
  • Loading branch information
ccerv1 authored May 20, 2024
1 parent 9f1b301 commit 68f791e
Show file tree
Hide file tree
Showing 21 changed files with 115 additions and 62 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
select
events.project_id,
events.event_source,
time_intervals.time_interval,
'active_developer_count' as metric,
COUNT(distinct events.from_artifact_id) as amount
Expand All @@ -10,4 +11,5 @@ where
and events.bucket_day >= time_intervals.start_date
group by
events.project_id,
events.event_source,
time_intervals.time_interval
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ with all_contributions as (
select
project_id,
from_artifact_id,
event_source,
bucket_month,
SUM(amount) as amount
from {{ ref('int_events_monthly_to_project') }}
where event_type = 'COMMIT_CODE'
group by
project_id,
from_artifact_id,
event_source,
bucket_month
),

Expand All @@ -22,6 +24,7 @@ aggregated_contributions as (
select
contributions.project_id,
contributions.from_artifact_id,
contributions.event_source,
time_intervals.time_interval,
SUM(contributions.amount) as amount
from contributions
Expand All @@ -32,27 +35,29 @@ aggregated_contributions as (
group by
contributions.project_id,
contributions.from_artifact_id,
contributions.event_source,
time_intervals.time_interval
),

ranked_contributions as (
select
project_id,
event_source,
time_interval,
from_artifact_id,
amount,
RANK()
over (
partition by project_id, time_interval
partition by project_id, event_source, time_interval
order by amount desc
) as rank,
SUM(amount)
over (
partition by project_id, time_interval
partition by project_id, event_source, time_interval
) as total_project_amount,
SUM(amount)
over (
partition by project_id, time_interval
partition by project_id, event_source, time_interval
order by amount desc
rows between unbounded preceding and current row
) as cumulative_amount
Expand All @@ -61,6 +66,7 @@ ranked_contributions as (

select
project_id,
event_source,
time_interval,
'bus_factor' as metric,
MAX(
Expand All @@ -74,4 +80,5 @@ from
ranked_contributions
group by
project_id,
event_source,
time_interval
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
select
events.project_id,
events.event_source,
time_intervals.time_interval,
CONCAT(LOWER(events.event_type), '_count') as metric,
SUM(events.amount) as amount
Expand All @@ -16,5 +17,6 @@ where
)
group by
events.project_id,
events.event_source,
time_intervals.time_interval,
events.event_type
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
select
events.project_id,
events.event_source,
time_intervals.time_interval,
'contributor_count' as metric,
COUNT(distinct events.from_artifact_id) as amount
Expand All @@ -14,4 +15,5 @@ where
and events.bucket_day >= time_intervals.start_date
group by
events.project_id,
events.event_source,
time_intervals.time_interval
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
with dev_stats as (
select
events.project_id,
events.event_source,
time_intervals.time_interval,
events.from_artifact_id,
TIMESTAMP_TRUNC(events.bucket_day, month) as bucket_month,
Expand All @@ -14,15 +15,17 @@ with dev_stats as (
and events.bucket_day >= time_intervals.start_date
group by
events.project_id,
events.event_source,
time_intervals.time_interval,
events.from_artifact_id,
TIMESTAMP_TRUNC(events.bucket_day, month)
)

select
project_id,
event_source,
time_interval,
'fulltime_developer_avg' as metric,
'fulltime_developer_average' as metric,
(
COUNT(distinct from_artifact_id)
/ COUNT(distinct bucket_month)
Expand All @@ -31,4 +34,5 @@ from dev_stats
where amount >= {{ fulltime_dev_days }}
group by
project_id,
event_source,
time_interval
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
with user_stats as (
select
from_artifact_id,
event_source,
project_id,
min(bucket_day) as first_day
from {{ ref('int_events_daily_to_project') }}
Expand All @@ -12,11 +13,13 @@ with user_stats as (
)
group by
from_artifact_id,
event_source,
project_id
)

select
events.project_id,
events.event_source,
time_intervals.time_interval,
'new_contributor_count' as metric,
count(
Expand All @@ -31,6 +34,7 @@ inner join user_stats
on
events.from_artifact_id = user_stats.from_artifact_id
and events.project_id = user_stats.project_id
and events.event_source = user_stats.event_source
cross join {{ ref('int_time_intervals') }} as time_intervals
where
events.event_type in (
Expand All @@ -41,4 +45,5 @@ where
and events.bucket_day >= time_intervals.start_date
group by
events.project_id,
events.event_source,
time_intervals.time_interval
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ with metrics as (
union all
select * from {{ ref('int_code_metric__contributors') }}
union all
select * from {{ ref('int_code_metric__fulltime_developers_avg') }}
select *
from {{ ref('int_code_metric__fulltime_developers_average') }}
union all
select * from {{ ref('int_code_metric__new_contributors') }}
),

aggs as (
select
project_id,
event_source,
SUM(
case
when
Expand Down Expand Up @@ -97,53 +99,81 @@ aggs as (
SUM(
case
when
metric = 'fulltime_developer_avg'
metric = 'fulltime_developer_average'
and time_interval = '6 MONTHS'
then amount
else 0
end
) as fulltime_developer_avg_6_months
) as fulltime_developer_average_6_months
from metrics
group by project_id
group by
project_id,
event_source
),

repos as (
select
project_id,
artifact_source as event_source,
MIN(first_commit_time) as first_commit_date,
MAX(last_commit_time) as last_commit_date,
COUNT(distinct artifact_id) as repository_count,
SUM(star_count) as star_count,
SUM(fork_count) as fork_count
from {{ ref('int_repo_metrics_by_project') }}
--WHERE r.is_fork = false
group by project_id
group by
project_id,
artifact_source
),

code_metrics as (
select
repos.*,
aggs.* except (project_id, event_source)
from repos
left join aggs
on
repos.project_id = aggs.project_id
and repos.event_source = aggs.event_source
),

project_metadata as (
select
project_id,
project_source,
project_namespace,
project_name,
display_name,
'GITHUB' as event_source
from {{ ref('int_projects') }}

)

select
int_projects.project_id,
int_projects.project_source,
int_projects.project_namespace,
int_projects.project_name,
int_projects.display_name,
repos.first_commit_date,
repos.last_commit_date,
repos.repository_count,
repos.star_count,
repos.fork_count,
aggs.contributor_count,
aggs.contributor_count_6_months,
aggs.new_contributor_count_6_months,
aggs.fulltime_developer_avg_6_months,
aggs.active_developer_count_6_months,
aggs.commit_count_6_months,
aggs.opened_pull_request_count_6_months,
aggs.merged_pull_request_count_6_months,
aggs.opened_issue_count_6_months,
aggs.closed_issue_count_6_months,
'GITHUB' as repository_source
from {{ ref('int_projects') }}
left join aggs
on int_projects.project_id = aggs.project_id
left join repos
on int_projects.project_id = repos.project_id
project_metadata.project_id,
project_metadata.project_source,
project_metadata.project_namespace,
project_metadata.project_name,
project_metadata.display_name,
project_metadata.event_source,
code_metrics.first_commit_date,
code_metrics.last_commit_date,
code_metrics.repository_count,
code_metrics.star_count,
code_metrics.fork_count,
code_metrics.contributor_count,
code_metrics.contributor_count_6_months,
code_metrics.new_contributor_count_6_months,
code_metrics.fulltime_developer_average_6_months,
code_metrics.active_developer_count_6_months,
code_metrics.commit_count_6_months,
code_metrics.opened_pull_request_count_6_months,
code_metrics.merged_pull_request_count_6_months,
code_metrics.opened_issue_count_6_months,
code_metrics.closed_issue_count_6_months
from project_metadata
left join code_metrics
on
project_metadata.project_id = code_metrics.project_id
and project_metadata.event_source = code_metrics.event_source
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ with metrics as (
aggs as (
select
project_id,
network,
event_source,
SUM(
case
when
Expand Down Expand Up @@ -162,7 +162,7 @@ aggs as (
from metrics
group by
project_id,
network
event_source
)

select
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ select
int_artifacts_by_project.artifact_id,
int_artifacts_by_project.artifact_namespace,
int_artifacts_by_project.artifact_name,
int_artifacts_by_project.artifact_source,
repo_snapshot.is_fork,
repo_snapshot.fork_count,
repo_snapshot.star_count,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
select
events.project_id,
events.event_source as network,
events.event_source,
time_intervals.time_interval,
'address_count' as metric,
COUNT(distinct events.from_artifact_id) as amount
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
select
events.project_id,
events.event_source as network,
events.event_source,
time_intervals.time_interval,
'active_contract_count' as metric,
COUNT(distinct events.to_artifact_id) as amount
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
select
events.project_id,
events.event_source as network,
events.event_source,
time_intervals.time_interval,
'days_since_first_transaction' as metric,
MAX(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
select
events.project_id,
events.event_source as network,
events.event_source,
time_intervals.time_interval,
'gas_fees' as metric,
SUM(events.amount / 1e18) as amount
Expand Down
Loading

0 comments on commit 68f791e

Please sign in to comment.