diff --git a/dbt/models/marts/github_metrics/github_metrics_by_collection.sql b/dbt/models/marts/github_metrics/github_metrics_by_collection.sql new file mode 100644 index 000000000..d1aa7fb3c --- /dev/null +++ b/dbt/models/marts/github_metrics/github_metrics_by_collection.sql @@ -0,0 +1,101 @@ +{# + Summary GitHub metrics for a collection: + - first_commit_date: The date of the first commit to the collection + - last_commit_date: The date of the last commit to the collection + - repos: The number of repositories in the collection + - stars: The number of stars the collection has + - forks: The number of forks the collection has + - contributors: The number of contributors to the collection + - contributors_6_months: The number of contributors to the collection in the last 6 months + - new_contributors_6_months: The number of new contributors to the collection in the last 6 months + - avg_active_devs_6_months: The average number of active developers in the last 6 months + - commits_6_months: The number of commits to the collection in the last 6 months + - issues_opened_6_months: The number of issues opened in the collection in the last 6 months + - issues_closed_6_months: The number of issues closed in the collection in the last 6 months + - pull_requests_opened_6_months: The number of pull requests opened in the collection in the last 6 months + - pull_requests_merged_6_months: The number of pull requests merged in the collection in the last 6 months +#} + +-- CTE for calculating the first and last commit date for each collection, ignoring forked repos +WITH collection_commit_dates AS ( + SELECT + pbc.collection_id, + MIN(e.time) AS first_commit_date, + MAX(e.time) AS last_commit_date + FROM {{ ref('int_events_to_project') }} AS e + JOIN {{ ref('stg_ossd__repositories_by_project') }} AS r ON e.project_id = r.project_id + JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON r.project_id = pbc.project_id + WHERE e.event_type = 'COMMIT_CODE' + AND r.is_fork = false + GROUP BY pbc.collection_id +), +-- CTE for aggregating stars, forks, and repository counts by collection +collection_stars_forks_repos AS ( + SELECT + pbc.collection_id, + COUNT(DISTINCT r.id) AS repos, + SUM(r.star_count) AS stars, + SUM(r.fork_count) AS forks + FROM {{ ref('stg_ossd__repositories_by_project') }} AS r + JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON r.project_id = pbc.project_id + WHERE r.is_fork = false + GROUP BY pbc.collection_id +), +-- CTE for calculating contributor counts and new contributors in the last 6 months at collection level +collection_contributors AS ( + SELECT + pbc.collection_id, + COUNT(DISTINCT d.from_id) AS contributors, + COUNT(DISTINCT CASE WHEN d.bucket_month >= DATE_ADD(CURRENT_DATE(), INTERVAL -6 MONTH) THEN d.from_id END) AS contributors_6_months, + COUNT(DISTINCT CASE WHEN d.bucket_month >= DATE_ADD(CURRENT_DATE(), INTERVAL -6 MONTH) AND d.user_segment_type IN ('FULL_TIME_DEV', 'PART_TIME_DEV') THEN CONCAT(d.from_id, '_', d.bucket_month) END) / 6 AS avg_active_devs_6_months, + COUNT(DISTINCT CASE WHEN d.first_contribution_date >= DATE_ADD(CURRENT_DATE(), INTERVAL -6 MONTH) THEN d.from_id END) AS new_contributors_6_months + FROM ( + SELECT + d.from_id, + pbc.collection_id, + d.bucket_month, + d.user_segment_type, + MIN(d.bucket_month) OVER (PARTITION BY d.from_id, pbc.collection_id) AS first_contribution_date + FROM {{ ref('int_devs') }} AS d + JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON d.project_id = pbc.project_id + ) AS d + GROUP BY pbc.collection_id +), +-- CTE for summarizing collection activity metrics over the past 6 months +collection_activity AS ( + SELECT + pbc.collection_id, + SUM(CASE WHEN e.event_type = 'COMMIT_CODE' THEN e.amount END) AS commits_6_months, + SUM(CASE WHEN e.event_type = 'ISSUE_OPENED' THEN e.amount END) AS issues_opened_6_months, + SUM(CASE WHEN e.event_type = 'ISSUE_CLOSED' THEN e.amount END) AS issues_closed_6_months, + SUM(CASE WHEN e.event_type = 'PULL_REQUEST_OPENED' THEN e.amount END) AS pull_requests_opened_6_months, + SUM(CASE WHEN e.event_type = 'PULL_REQUEST_MERGED' THEN e.amount END) AS pull_requests_merged_6_months + FROM {{ ref('int_events_to_project') }} AS e + JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON e.project_id = pbc.project_id + WHERE e.time >= DATE_ADD(CURRENT_DATE(), INTERVAL -6 MONTH) + GROUP BY pbc.collection_id +) + +-- Final query to join all the metrics together for collections +SELECT + c.collection_id, + c.collection_name, + ccd.first_commit_date, + ccd.last_commit_date, + csfr.repos, + csfr.stars, + csfr.forks, + cc.contributors, + cc.contributors_6_months, + cc.new_contributors_6_months, + cc.avg_active_devs_6_months, + ca.commits_6_months, + ca.issues_opened_6_months, + ca.issues_closed_6_months, + ca.pull_requests_opened_6_months, + ca.pull_requests_merged_6_months +FROM {{ ref('collections') }} AS c +INNER JOIN collection_commit_dates AS ccd ON c.collection_id = ccd.collection_id +INNER JOIN collection_stars_forks_repos AS csfr ON c.collection_id = csfr.collection_id +INNER JOIN collection_contributors AS cc ON c.collection_id = cc.collection_id +INNER JOIN collection_activity AS ca ON c.collection_id = ca.collection_id \ No newline at end of file diff --git a/dbt/models/marts/github_metrics/github_metrics_by_project.sql b/dbt/models/marts/github_metrics/github_metrics_by_project.sql index 6e97a469a..dfaad9325 100644 --- a/dbt/models/marts/github_metrics/github_metrics_by_project.sql +++ b/dbt/models/marts/github_metrics/github_metrics_by_project.sql @@ -92,7 +92,7 @@ SELECT act.pull_requests_opened_6_months, act.pull_requests_merged_6_months FROM {{ ref('projects') }} AS p -JOIN project_commit_dates AS pcd ON p.project_id = pcd.project_id -JOIN stars_forks_repos AS sfr ON p.project_id = sfr.project_id -JOIN contributors_cte AS c ON p.project_id = c.project_id -JOIN activity_cte AS act ON p.project_id = act.project_id +INNER JOIN project_commit_dates AS pcd ON p.project_id = pcd.project_id +INNER JOIN stars_forks_repos AS sfr ON p.project_id = sfr.project_id +INNER JOIN contributors_cte AS c ON p.project_id = c.project_id +INNER JOIN activity_cte AS act ON p.project_id = act.project_id diff --git a/dbt/models/marts/onchain_metrics/onchain_metrics_by_collection_arbitrum.sql b/dbt/models/marts/onchain_metrics/onchain_metrics_by_collection_arbitrum.sql new file mode 100644 index 000000000..f1fd590ca --- /dev/null +++ b/dbt/models/marts/onchain_metrics/onchain_metrics_by_collection_arbitrum.sql @@ -0,0 +1,138 @@ +{# + Arbitrum Onchain Metrics + Summary onchain metrics for a collection: + - num_projects: The number of projects in the collection + - num_contracts: The number of contracts in the collection + - first_txn_date: The date of the first transaction to the collection + - total_txns: The total number of transactions to the collection + - total_l2_gas: The total L2 gas used by the collection + - total_users: The number of unique users interacting with the collection + - txns_6_months: The total number of transactions to the collection in the last 6 months + - l2_gas_6_months: The total L2 gas used by the collection in the last 6 months + - users_6_months: The number of unique users interacting with the collection in the last 6 months + - new_users: The number of users interacting with the collection for the first time in the last 3 months + - active_users: The number of active users interacting with the collection in the last 3 months + - high_frequency_users: The number of users who have made 1000+ transactions with the collection in the last 3 months + - more_active_users: The number of users who have made 10-999 transactions with the collection in the last 3 months + - less_active_users: The number of users who have made 1-9 transactions with the collection in the last 3 months +#} + + +-- CTE for grabbing the onchain transaction data we care about, including project-collection mapping +WITH txns AS ( + SELECT + pbc.collection_id, + a.project_id, + c.from_source_id AS from_id, + DATE(TIMESTAMP_TRUNC(c.time, MONTH)) AS bucket_month, + l2_gas, + tx_count + FROM {{ ref('stg_dune__arbitrum_contract_invocation') }} AS c + JOIN {{ ref('stg_ossd__artifacts_by_project') }} AS a ON c.to_source_id = a.artifact_source_id + JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON a.project_id = pbc.project_id +), + +-- CTEs for calculating all-time and 6-month collection metrics across all contracts +metrics_all_time AS ( + SELECT + collection_id, + COUNT(DISTINCT project_id) AS total_projects, + MIN(bucket_month) AS first_txn_date, + COUNT(DISTINCT from_id) AS total_users, + SUM(l2_gas) AS total_l2_gas, + SUM(tx_count) AS total_txns + FROM txns + GROUP BY collection_id +), +metrics_6_months AS ( + SELECT + collection_id, + COUNT(DISTINCT from_id) AS users_6_months, + SUM(l2_gas) AS l2_gas_6_months, + SUM(tx_count) AS txns_6_months + FROM txns + WHERE bucket_month >= DATE_ADD(CURRENT_DATE(), INTERVAL -6 MONTH) + GROUP BY collection_id +), + +-- CTE for identifying new users to the collection in the last 3 months +new_users AS ( + SELECT + collection_id, + SUM(is_new_user) AS new_user_count + FROM ( + SELECT + collection_id, + from_id, + CASE WHEN MIN(bucket_month) >= DATE_ADD(CURRENT_DATE(), INTERVAL -3 MONTH) THEN 1 ELSE 0 END AS is_new_user + FROM txns + GROUP BY collection_id, from_id + ) + GROUP BY collection_id +), + +-- CTEs for segmenting different types of active users based on txn volume at collection level +user_txns_aggregated AS ( + SELECT + collection_id, + from_id, + SUM(tx_count) AS total_tx_count + FROM txns + WHERE bucket_month >= DATE_ADD(CURRENT_DATE(), INTERVAL -3 MONTH) + GROUP BY collection_id, from_id +), +user_segments AS ( + SELECT + collection_id, + COUNT(DISTINCT CASE WHEN user_segment = 'HIGH_FREQUENCY_USER' THEN from_id END) AS high_frequency_users, + COUNT(DISTINCT CASE WHEN user_segment = 'MORE_ACTIVE_USER' THEN from_id END) AS more_active_users, + COUNT(DISTINCT CASE WHEN user_segment = 'LESS_ACTIVE_USER' THEN from_id END) AS less_active_users + FROM ( + SELECT + collection_id, + from_id, + CASE + WHEN total_tx_count >= 1000 THEN 'HIGH_FREQUENCY_USER' + WHEN total_tx_count >= 10 THEN 'MORE_ACTIVE_USER' + ELSE 'LESS_ACTIVE_USER' + END AS user_segment + FROM user_txns_aggregated + ) + GROUP BY collection_id +), + +-- CTE to count the number of contracts deployed by projects in a collection +contracts AS ( + SELECT + pbc.collection_id, + COUNT(DISTINCT a.artifact_source_id) AS num_contracts + FROM {{ ref('stg_ossd__artifacts_by_project') }} AS a + JOIN {{ ref('stg_ossd__projects_by_collection') }} AS pbc ON a.project_id = pbc.project_id + GROUP BY pbc.collection_id +) + +-- Final query to join all the metrics together for collections +SELECT + c.collection_id, + c.collection_name, + co.num_contracts, + ma.total_projects, + ma.first_txn_date, + ma.total_txns, + ma.total_l2_gas, + ma.total_users, + m6.txns_6_months, + m6.l2_gas_6_months, + m6.users_6_months, + nu.new_user_count AS new_users, + (us.high_frequency_users + us.more_active_users + us.less_active_users) AS active_users, + us.high_frequency_users, + us.more_active_users, + us.less_active_users + +FROM {{ ref('collections') }} AS c +INNER JOIN metrics_all_time AS ma ON c.collection_id = ma.collection_id +INNER JOIN metrics_6_months AS m6 on c.collection_id = m6.collection_id +INNER JOIN new_users AS nu on c.collection_id = nu.collection_id +INNER JOIN user_segments AS us on c.collection_id = us.collection_id +INNER JOIN contracts AS co on c.collection_id = co.collection_id \ No newline at end of file diff --git a/dbt/models/marts/onchain_metrics/onchain_metrics_by_project_arbitrum.sql b/dbt/models/marts/onchain_metrics/onchain_metrics_by_project_arbitrum.sql index 4a06dd5f6..dcb2fb7bc 100644 --- a/dbt/models/marts/onchain_metrics/onchain_metrics_by_project_arbitrum.sql +++ b/dbt/models/marts/onchain_metrics/onchain_metrics_by_project_arbitrum.sql @@ -125,7 +125,7 @@ SELECT us.less_active_users FROM {{ ref('projects') }} AS p -JOIN metrics_all_time AS ma ON p.project_id = ma.project_id +INNER JOIN metrics_all_time AS ma ON p.project_id = ma.project_id INNER JOIN metrics_6_months AS m6 on p.project_id = m6.project_id INNER JOIN new_users AS nu on p.project_id = nu.project_id INNER JOIN user_segments AS us on p.project_id = us.project_id