From 778fb155287f2e4939e52ec548bbbe973e494e3a Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Thu, 19 Dec 2024 18:25:13 -0500 Subject: [PATCH] feat(dbt): open collective deposits to github projects (#2660) * feat(dbt): open collective deposits to github projects * wip: expanded deposits model * feat(dbt): add int model to identify open collective project names * fix(dbt): refact deposit events model * feat(dbt): pass open collective deposits into oss_funding mart model * fix(dbt): include unlabeled projects from oss-funding csv --- .../int_open_collective_projects.sql | 59 +++++++++++++++ .../funding/int_open_collective_deposits.sql | 24 ++++++ .../int_oss_funding_grants_to_project.sql | 74 +++++++++++++++---- 3 files changed, 144 insertions(+), 13 deletions(-) create mode 100644 warehouse/dbt/models/intermediate/directory/int_open_collective_projects.sql create mode 100644 warehouse/dbt/models/intermediate/funding/int_open_collective_deposits.sql diff --git a/warehouse/dbt/models/intermediate/directory/int_open_collective_projects.sql b/warehouse/dbt/models/intermediate/directory/int_open_collective_projects.sql new file mode 100644 index 00000000..5718a7e9 --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_open_collective_projects.sql @@ -0,0 +1,59 @@ +with url_registry as ( + select + LOWER(JSON_EXTRACT_SCALAR(to_account, '$.slug')) as project_slug, + LOWER(JSON_EXTRACT_SCALAR(link, '$.url')) as github_url, + REGEXP_EXTRACT(LOWER(JSON_EXTRACT_SCALAR(link, '$.url')), r'github\.com/([a-z0-9-]+)') + as artifact_namespace, + REGEXP_EXTRACT( + LOWER(JSON_EXTRACT_SCALAR(link, '$.url')), r'github\.com/[a-z0-9-]+/([a-z0-9-._]+)' + ) as artifact_name + from + {{ ref('stg_open_collective__deposits') }}, + UNNEST(JSON_EXTRACT_ARRAY(to_account, '$.socialLinks')) as link + where + JSON_EXTRACT_SCALAR(link, '$.url') like '%github.com%' +), + +oso_projects as ( + select + project_id, + artifact_namespace, + artifact_name + from {{ ref('repositories_v0') }} + where artifact_source = 'GITHUB' +), + +namespace_counts as ( + select + artifact_namespace, + COUNT(distinct project_id) as project_count, + MIN(project_id) as project_id + from oso_projects + group by artifact_namespace +), + +matched_projects as ( + select + ur.*, + case + when op.project_id is not null then op.project_id + when nc.project_count = 1 then nc.project_id + end as project_id + from url_registry as ur + left join oso_projects as op + on + ur.artifact_namespace = op.artifact_namespace + and ur.artifact_name = op.artifact_name + left join namespace_counts as nc + on ur.artifact_namespace = nc.artifact_namespace +) + +select distinct + project_id as project_id, + {{ oso_id("'OPEN_COLLECTIVE'", 'project_slug') }} as artifact_id, + project_slug as artifact_source_id, + 'OPEN_COLLECTIVE' as artifact_source, + '' as artifact_namespace, + project_slug as artifact_name, + 'https://opencollective.com/' || project_slug as artifact_url +from matched_projects diff --git a/warehouse/dbt/models/intermediate/funding/int_open_collective_deposits.sql b/warehouse/dbt/models/intermediate/funding/int_open_collective_deposits.sql new file mode 100644 index 00000000..43882486 --- /dev/null +++ b/warehouse/dbt/models/intermediate/funding/int_open_collective_deposits.sql @@ -0,0 +1,24 @@ +with open_collective_deposits as ( + select + id as event_source_id, + created_at as `time`, + JSON_EXTRACT_SCALAR(to_account, '$.name') as project_name, + LOWER(JSON_EXTRACT_SCALAR(to_account, '$.slug')) as project_slug, + UPPER(JSON_EXTRACT_SCALAR(to_account, '$.type')) as project_type, + UPPER(JSON_EXTRACT_SCALAR(amount, '$.currency')) as currency, + CAST(JSON_EXTRACT_SCALAR(amount, '$.value') as NUMERIC) as amount + from {{ ref('stg_open_collective__deposits') }} +) + +select + open_collective_deposits.event_source_id, + open_collective_deposits.`time`, + projects.project_id, + open_collective_deposits.project_name, + open_collective_deposits.project_slug, + open_collective_deposits.project_type, + open_collective_deposits.currency, + open_collective_deposits.amount +from open_collective_deposits +left join {{ ref('int_open_collective_projects') }} as projects + on open_collective_deposits.project_slug = projects.artifact_source_id diff --git a/warehouse/dbt/models/intermediate/funding/int_oss_funding_grants_to_project.sql b/warehouse/dbt/models/intermediate/funding/int_oss_funding_grants_to_project.sql index ac2bc517..2c71e251 100644 --- a/warehouse/dbt/models/intermediate/funding/int_oss_funding_grants_to_project.sql +++ b/warehouse/dbt/models/intermediate/funding/int_oss_funding_grants_to_project.sql @@ -17,8 +17,7 @@ with oss_funding_data as ( parse_json(metadata_json) as metadata_json from {{ source('static_data_sources', 'oss_funding_v1') }} where - to_project_name is not null - and from_funder_name is not null + from_funder_name is not null and amount is not null ), @@ -52,6 +51,55 @@ gitcoin_data as ( from {{ ref('int_gitcoin_funding_events') }} ), +open_collective_data as ( + select -- noqa: ST06 + ocd.`time`, + 'GRANT_RECEIVED_USD' as event_type, + ocd.event_source_id, + 'OPEN_COLLECTIVE' as event_source, + projects.project_name as to_project_name, + 'opencollective' as from_project_name, + ocd.amount, + 'contributions' as grant_pool_name, + to_json(struct( + ocd.project_name as open_collective_project_name, + ocd.project_slug as open_collective_project_slug, + ocd.project_type as open_collective_project_type, + ocd.currency as open_collective_currency, + ocd.amount as open_collective_amount + )) as metadata_json + from {{ ref('int_open_collective_deposits') }} as ocd + left join {{ ref('projects_v1') }} as projects + on ocd.project_id = projects.project_id + where ocd.currency = 'USD' +), + +oso_indexed_data as ( + select + gitcoin_data.time, + gitcoin_data.event_type, + gitcoin_data.event_source_id, + gitcoin_data.event_source, + gitcoin_data.to_project_name, + gitcoin_data.from_project_name, + gitcoin_data.amount, + gitcoin_data.grant_pool_name, + gitcoin_data.metadata_json + from gitcoin_data + union all + select + open_collective_data.time, + open_collective_data.event_type, + open_collective_data.event_source_id, + open_collective_data.event_source, + open_collective_data.to_project_name, + open_collective_data.from_project_name, + open_collective_data.amount, + open_collective_data.grant_pool_name, + open_collective_data.metadata_json + from open_collective_data +), + grants as ( select oss_funding_data.time, @@ -68,16 +116,16 @@ grants as ( union all select - gitcoin_data.time, - gitcoin_data.event_type, - gitcoin_data.event_source_id, - gitcoin_data.event_source, - gitcoin_data.to_project_name, - gitcoin_data.from_project_name, - gitcoin_data.amount, - gitcoin_data.grant_pool_name, - gitcoin_data.metadata_json - from gitcoin_data + oso_indexed_data.time, + oso_indexed_data.event_type, + oso_indexed_data.event_source_id, + oso_indexed_data.event_source, + oso_indexed_data.to_project_name, + oso_indexed_data.from_project_name, + oso_indexed_data.amount, + oso_indexed_data.grant_pool_name, + oso_indexed_data.metadata_json + from oso_indexed_data ) select @@ -87,7 +135,7 @@ select grants.event_source, grants.to_project_name, to_projects.project_id as to_project_id, - 'WALLET' as to_type, + 'FISCAL_HOST' as to_type, grants.from_project_name, from_projects.project_id as from_project_id, 'WALLET' as from_type,