Skip to content

Commit

Permalink
dbt: onchain int models for more composable impact metrics (#1266)
Browse files Browse the repository at this point in the history
* fix: new users in onchain metrics

* refactor: int models

* update onchain marts

* update code metrics marts

* add groupbys

* fix: typo in contributors pm

* fix: ambiguous column

* remove typo

* fix: code metrics
  • Loading branch information
ccerv1 authored Apr 22, 2024
1 parent 02ad47b commit 6311a35
Show file tree
Hide file tree
Showing 21 changed files with 486 additions and 245 deletions.
File renamed without changes.
6 changes: 3 additions & 3 deletions warehouse/dbt/models/intermediate/users/int_addresses.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ SELECT
from_id,
from_namespace AS network,
project_id,
MIN(time) AS date_first_txn,
MAX(time) AS date_last_txn,
MIN(bucket_day) AS date_first_txn,
MAX(bucket_day) AS date_last_txn,
SUM(amount) AS count_events
FROM {{ ref('int_events_to_project') }}
FROM {{ ref('int_user_events_daily_to_project') }}
WHERE event_type = 'CONTRACT_INVOCATION_DAILY_COUNT'
GROUP BY 1, 2, 3
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{#
Address stats by project and network
#}

SELECT
e.project_id,
e.from_namespace,
e.from_id,
e.bucket_day,
e.amount,
CASE
WHEN e.bucket_day = a.date_first_txn THEN 'NEW'
ELSE 'RETURNING'
END AS address_type
FROM {{ ref('int_user_events_daily_to_project') }} AS e
LEFT JOIN {{ ref('int_addresses') }} AS a
ON
e.from_id = a.from_id
AND e.from_namespace = a.network
AND e.project_id = a.project_id
WHERE
e.event_type = 'CONTRACT_INVOCATION_DAILY_COUNT'
6 changes: 3 additions & 3 deletions warehouse/dbt/models/intermediate/users/int_devs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ SELECT
from_id,
from_namespace AS repository_source,
project_id,
MIN(time) AS date_first_contribution,
MAX(time) AS date_last_contribution,
MIN(bucket_day) AS date_first_contribution,
MAX(bucket_day) AS date_last_contribution,
SUM(amount) AS count_events
FROM {{ ref('int_events_to_project') }}
FROM {{ ref('int_user_events_daily_to_project') }}
WHERE
event_type IN (
'COMMIT_CODE',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{#
This model aggregates user events to collection level on
a daily basis. It is used to calculate various
user engagement metrics by project.
#}

SELECT
from_id,
from_namespace,
collection_id,
event_type,
TIMESTAMP_TRUNC(time, DAY) AS bucket_day,
SUM(amount) AS amount
FROM {{ ref('int_events_to_collection') }}
WHERE
event_type IN (
'COMMIT_CODE',
'PULL_REQUEST_OPENED',
'PULL_REQUEST_REOPENED',
'PULL_REQUEST_CLOSED',
'PULL_REQUEST_MERGED',
'ISSUE_CLOSED',
'ISSUE_OPENED',
'ISSUE_REOPENED',
'CONTRACT_INVOCATION_DAILY_COUNT'
)
GROUP BY
from_id,
from_namespace,
collection_id,
event_type,
TIMESTAMP_TRUNC(time, DAY)
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{#
This model aggregates user events to project level on
a daily basis. It is used to calculate various
user engagement metrics by project.
#}

SELECT
from_id,
from_namespace,
project_id,
event_type,
TIMESTAMP_TRUNC(time, DAY) AS bucket_day,
SUM(amount) AS amount
FROM {{ ref('int_events_to_project') }}
WHERE
event_type IN (
'COMMIT_CODE',
'PULL_REQUEST_OPENED',
'PULL_REQUEST_REOPENED',
'PULL_REQUEST_CLOSED',
'PULL_REQUEST_MERGED',
'ISSUE_CLOSED',
'ISSUE_OPENED',
'ISSUE_REOPENED',
'CONTRACT_INVOCATION_DAILY_COUNT'
)
GROUP BY
from_id,
from_namespace,
project_id,
event_type,
TIMESTAMP_TRUNC(time, DAY)
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ SELECT
from_namespace,
collection_id,
event_type,
TIMESTAMP_TRUNC(time, MONTH) AS bucket_month,
COUNT(DISTINCT TIMESTAMP_TRUNC(time, DAY)) AS count_days,
TIMESTAMP_TRUNC(bucket_day, MONTH) AS bucket_month,
COUNT(DISTINCT bucket_day) AS count_days,
SUM(amount) AS total_amount
FROM {{ ref('int_events_to_collection') }}
FROM {{ ref('int_user_events_daily_to_collection') }}
GROUP BY
from_id,
from_namespace,
collection_id,
event_type,
TIMESTAMP_TRUNC(time, MONTH)
TIMESTAMP_TRUNC(bucket_day, MONTH)
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ SELECT
from_namespace,
project_id,
event_type,
TIMESTAMP_TRUNC(time, MONTH) AS bucket_month,
COUNT(DISTINCT TIMESTAMP_TRUNC(time, DAY)) AS count_days,
SUM(amount) AS amount
FROM {{ ref('int_events_to_project') }}
TIMESTAMP_TRUNC(bucket_day, MONTH) AS bucket_month,
COUNT(DISTINCT bucket_day) AS count_days,
SUM(amount) AS total_amount
FROM {{ ref('int_user_events_daily_to_project') }}
GROUP BY
from_id,
from_namespace,
project_id,
event_type,
TIMESTAMP_TRUNC(time, MONTH)
TIMESTAMP_TRUNC(bucket_day, MONTH)
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{#
This model aggregates user events to project level by time interval.
It is used to calculate various user engagement metrics by project.
#}

SELECT
e.from_id,
e.from_namespace,
e.project_id,
t.time_interval,
e.event_type,
SUM(e.amount) AS amount
FROM {{ ref('int_user_events_daily_to_project') }} AS e
CROSS JOIN {{ ref('int_time_intervals') }} AS t
WHERE DATE(e.bucket_day) >= t.start_date
GROUP BY
e.from_id,
e.project_id,
e.from_namespace,
t.time_interval,
e.event_type
151 changes: 87 additions & 64 deletions warehouse/dbt/models/marts/code_metrics/code_metrics_by_project.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
})
}}

-- CTE for aggregating repo data for each project
WITH project_repos_summary AS (
SELECT
project_id,
Expand All @@ -43,105 +42,129 @@ WITH project_repos_summary AS (
repository_source
),

-- CTE for calculating contributor counts and new contributors in the last 6
-- months
devs_cte AS (
n_cte AS (
SELECT
project_id,
namespace AS repository_source,
SUM(CASE WHEN time_interval = 'ALL' THEN amount END) AS contributors,
SUM(CASE WHEN time_interval = '6M' THEN amount END)
AS new_contributors_6_months
FROM {{ ref('pm_new_contribs') }}
GROUP BY
project_id,
namespace
),

c_cte AS (
SELECT
project_id,
namespace AS repository_source,
SUM(amount) AS contributors_6_months
FROM {{ ref('pm_contributors') }}
WHERE time_interval = '6M'
GROUP BY
project_id,
namespace
),

d_cte AS (
SELECT
project_id,
namespace AS repository_source,
SUM(amount) / 6 AS contributors_6_months,
SUM(
CASE
WHEN CONTAINS_SUBSTR(impact_metric, 'FULL_TIME_DEV') THEN amount / 6
WHEN impact_metric = 'FULL_TIME_DEV_TOTAL' THEN amount / 6
ELSE 0
END
) AS avg_fulltime_devs_6_months,
) AS avg_fts_6_months,
SUM(
CASE
WHEN CONTAINS_SUBSTR(impact_metric, 'DEV') THEN amount / 6
WHEN impact_metric = 'PART_TIME_DEV_TOTAL' THEN amount / 6
ELSE 0
END
) AS avg_active_devs_6_months
) AS avg_pts_6_months
FROM {{ ref('pm_dev_months') }}
WHERE CONTAINS_SUBSTR(impact_metric, '_6M')
WHERE time_interval = '6M'
GROUP BY
project_id,
namespace
),

contribs_cte AS (
SELECT
n.project_id,
n.repository_source,
n.contributors,
n.new_contributors_6_months,
c.contributors_6_months,
d.avg_fts_6_months AS avg_fulltime_devs_6_months,
d.avg_fts_6_months + d.avg_pts_6_months AS avg_active_devs_6_months
FROM n_cte AS n
LEFT JOIN c_cte AS c
ON
n.project_id = c.project_id
AND n.repository_source = c.repository_source
LEFT JOIN d_cte AS d
ON
n.project_id = d.project_id
AND n.repository_source = d.repository_source
),

activity_cte AS (
SELECT
project_id,
namespace AS repository_source,
SUM(
CASE
WHEN CONTAINS_SUBSTR(impact_metric, '_ALL') THEN amount
ELSE 0
WHEN impact_metric = 'COMMIT_CODE_TOTAL' THEN amount
END
) AS contributors,
) AS commits_6_months,
SUM(
CASE
WHEN CONTAINS_SUBSTR(impact_metric, '_6M') THEN amount
ELSE 0
WHEN impact_metric = 'ISSUE_OPENED_TOTAL' THEN amount
END
) AS new_contributors_6_months
FROM {{ ref('pm_new_contribs') }}
) AS issues_opened_6_months,
SUM(
CASE
WHEN impact_metric = 'ISSUE_CLOSED_TOTAL' THEN amount
END
) AS issues_closed_6_months,
SUM(
CASE
WHEN impact_metric = 'PULL_REQUEST_OPENED_TOTAL' THEN amount
END
) AS pull_requests_opened_6_months,
SUM(
CASE
WHEN impact_metric = 'PULL_REQUEST_MERGED_TOTAL' THEN amount
END
) AS pull_requests_merged_6_months
FROM {{ ref('event_totals_by_project') }}
WHERE
time_interval = '6M'
AND impact_metric IN (
'COMMIT_CODE_TOTAL',
'ISSUE_OPENED_TOTAL',
'ISSUE_CLOSED_TOTAL',
'PULL_REQUEST_OPENED_TOTAL',
'PULL_REQUEST_MERGED_TOTAL'
)
GROUP BY
project_id,
namespace
),

-- CTE for summarizing project activity metrics over the past 6 months
activity_cte AS (
SELECT
project_id,
to_namespace AS repository_source,
SUM(CASE WHEN event_type = 'COMMIT_CODE' THEN amount END)
AS commits_6_months,
SUM(CASE WHEN event_type = 'ISSUE_OPENED' THEN amount END)
AS issues_opened_6_months,
SUM(CASE WHEN event_type = 'ISSUE_CLOSED' THEN amount END)
AS issues_closed_6_months,
SUM(CASE WHEN event_type = 'PULL_REQUEST_OPENED' THEN amount END)
AS pull_requests_opened_6_months,
SUM(CASE WHEN event_type = 'PULL_REQUEST_MERGED' THEN amount END)
AS pull_requests_merged_6_months
FROM {{ ref('int_events_to_project') }}
WHERE DATE(time) >= DATE_ADD(CURRENT_DATE(), INTERVAL -180 DAY)
GROUP BY project_id, repository_source
)

-- Final query to join all the metrics together

SELECT
p.project_id,
p.project_slug,
p.project_name,
p.repository_source AS `source`,
p.first_commit_date,
p.last_commit_date,
p.repositories,
p.stars,
p.forks,
c.contributors,
c.new_contributors_6_months,
d.contributors_6_months,
d.avg_fulltime_devs_6_months,
d.avg_active_devs_6_months,
act.commits_6_months,
act.issues_opened_6_months,
act.issues_closed_6_months,
act.pull_requests_opened_6_months,
act.pull_requests_merged_6_months
p.*,
c.* EXCEPT (project_id, repository_source),
a.* EXCEPT (project_id, repository_source)
FROM project_repos_summary AS p
LEFT JOIN devs_cte AS d
ON
p.project_id = d.project_id
AND p.repository_source = d.repository_source
LEFT JOIN contribs_cte AS c
ON
p.project_id = c.project_id
AND p.repository_source = c.repository_source
LEFT JOIN activity_cte AS act
LEFT JOIN activity_cte AS a
ON
p.project_id = act.project_id
AND p.repository_source = act.repository_source
p.project_id = a.project_id
AND p.repository_source = a.repository_source
Loading

0 comments on commit 6311a35

Please sign in to comment.