From 4d3c3ef07bf7d794b482bddf594a6e07f0a3ea16 Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Sun, 6 Oct 2024 12:07:58 -0400 Subject: [PATCH] fix: refactor `int_oss_funding_grants_to_project` (#2313) * chore: bump source to new schema * fix: gitcoin funding events to handle missing dates * fix: dlt should replace instead of append tables * fix: gitcoin project directory removes duplicates * feat(dbt): union oss funding sources * fix(dbt): address linting issues --- .../funding/int_gitcoin_funding_events.sql | 72 +++++++++--- .../funding/int_gitcoin_project_directory.sql | 6 +- .../int_oss_funding_grants_to_project.sql | 105 ++++++++++-------- .../staging/gitcoin/stg_gitcoin__matching.sql | 13 ++- .../gitcoin/stg_gitcoin__project_groups.sql | 18 ++- .../gitcoin/stg_gitcoin__project_lookup.sql | 6 + warehouse/dbt/models/static_data_sources.yml | 4 +- warehouse/oso_dagster/assets/gitcoin.py | 3 + 8 files changed, 152 insertions(+), 75 deletions(-) diff --git a/warehouse/dbt/models/intermediate/funding/int_gitcoin_funding_events.sql b/warehouse/dbt/models/intermediate/funding/int_gitcoin_funding_events.sql index 1808749b3..713fffba6 100644 --- a/warehouse/dbt/models/intermediate/funding/int_gitcoin_funding_events.sql +++ b/warehouse/dbt/models/intermediate/funding/int_gitcoin_funding_events.sql @@ -1,23 +1,65 @@ +{# + This model combines the donations and matching grants data to create a single table of funding events. + The `funding_type` column is used to differentiate between donations and matching grants. + The `event_time` data has some issues. It is missing for matching grants, so we use the most recent donation timestamp as a placeholder. To the cGrants data, we use a placeholder date of 2023-07-01. +#} + + +with last_donation_by_round as ( + select + round_id, + round_number, + gitcoin_project_id, + max(donation_timestamp) as assumed_event_time + from {{ ref('stg_gitcoin__donations') }} + group by + round_id, + round_number, + gitcoin_project_id +), + +unioned_events as ( + select + transaction_hash, + donation_timestamp as event_time, + round_id, + round_number, + chain_id, + gitcoin_project_id, + donor_address, + amount_in_usd, + 'DONATIONS' as funding_type + from {{ ref('stg_gitcoin__donations') }} + + union all + + select + null as transaction_hash, + last_donation_by_round.assumed_event_time as event_time, + matching.round_id, + matching.round_number, + matching.chain_id, + matching.gitcoin_project_id, + null as donor_address, + matching.amount_in_usd, + 'MATCHING' as funding_type + from {{ ref('stg_gitcoin__matching') }} as matching + left join last_donation_by_round + on + matching.round_id = last_donation_by_round.round_id + and matching.round_number = last_donation_by_round.round_number + and matching.gitcoin_project_id = last_donation_by_round.gitcoin_project_id +) + select transaction_hash, - donation_timestamp as event_time, round_id, round_number, chain_id, gitcoin_project_id, donor_address, amount_in_usd, - 'crowdfunding' as funding_type -from {{ ref('stg_gitcoin__donations') }} -union all -select - null as transaction_hash, - null as event_time, - round_id, - round_number, - chain_id, - gitcoin_project_id, - null as donor_address, - amount_in_usd, - 'matching_grant' as funding_type -from {{ ref('stg_gitcoin__matching') }} + funding_type, + coalesce(event_time, timestamp('2023-07-01')) as event_time +from unioned_events +where amount_in_usd > 0 diff --git a/warehouse/dbt/models/intermediate/funding/int_gitcoin_project_directory.sql b/warehouse/dbt/models/intermediate/funding/int_gitcoin_project_directory.sql index 5b0f9fd2c..b92dfebf1 100644 --- a/warehouse/dbt/models/intermediate/funding/int_gitcoin_project_directory.sql +++ b/warehouse/dbt/models/intermediate/funding/int_gitcoin_project_directory.sql @@ -1,6 +1,5 @@ with gitcoin_projects as ( select distinct - pg.gitcoin_group_id, pg.latest_project_github, pg.latest_project_recipient_address, project_lookup.gitcoin_project_id @@ -29,9 +28,10 @@ oso_projects as ( ) select distinct - gitcoin_projects.gitcoin_group_id, + oso_projects.oso_project_id, gitcoin_projects.gitcoin_project_id, - oso_projects.oso_project_id + gitcoin_projects.latest_project_github, + gitcoin_projects.latest_project_recipient_address from gitcoin_projects inner join oso_projects on ( gitcoin_projects.latest_project_github = oso_projects.repo_owner diff --git a/warehouse/dbt/models/intermediate/funding/int_oss_funding_grants_to_project.sql b/warehouse/dbt/models/intermediate/funding/int_oss_funding_grants_to_project.sql index 6cf527ece..cab51096e 100644 --- a/warehouse/dbt/models/intermediate/funding/int_oss_funding_grants_to_project.sql +++ b/warehouse/dbt/models/intermediate/funding/int_oss_funding_grants_to_project.sql @@ -4,44 +4,63 @@ {% set oss_funding_url = "https://github.com/opensource-observer/oss-funding" %} -with grants as ( +with oss_funding_data as ( select -- noqa: ST06 CAST(funding_date as timestamp) as `time`, 'GRANT_RECEIVED_USD' as event_type, - COALESCE(project_url, '{{ oss_funding_url }}') as event_source_id, + '{{ oss_funding_url }}' as event_source_id, 'OSS_FUNDING' as event_source, - LOWER(project_name) as project_name, - case - when funder_name in ('Optimism Foundation') then 'op' - when funder_name in ('Arbitrum Foundation') then 'arbitrumfoundation' - when funder_name in ('Octant') then 'octant-golemfoundation' - when funder_name in ('Gitcoin Grants') then 'gitcoin' - when funder_name in ('DAO Drops (dOrg)') then 'dao-drops-dorgtech' - end as funder_name, - LOWER(project_address) as to_name, - case - when funding_network in ('mainnet', '1.0') then 'ethereum' - when funding_network in ('optimism', '10.0') then 'optimism' - when funding_network in ('arbitrum', '42161.0') then 'arbitrum_one' - when funding_network in ('pgn', '424.0') then 'pgn' - when funding_network in ('polygon', '137.0') then 'polygon' - else 'other' - end as to_namespace, - 'WALLET' as to_type, - LOWER(project_address) as to_artifact_source_id, - LOWER(funder_address) as from_name, - case - when funding_network in ('mainnet', '1.0') then 'ethereum' - when funding_network in ('optimism', '10.0') then 'optimism' - when funding_network in ('arbitrum', '42161.0') then 'arbitrum_one' - when funding_network in ('pgn', '424.0') then 'pgn' - when funding_network in ('polygon', '137.0') then 'polygon' - else 'other' - end as from_namespace, - 'WALLET' as from_type, - LOWER(funder_address) as from_artifact_source_id, - funding_usd as amount - from {{ source('static_data_sources', 'oss_funding') }} + LOWER(to_project_name) as to_project_name, + LOWER(from_funder_name) as from_project_name, + COALESCE(amount, 0) as amount + from {{ source('static_data_sources', 'oss_funding_v1') }} + where + to_project_name is not null + and from_funder_name is not null + and amount is not null +), + +gitcoin_data as ( + select -- noqa: ST06 + events.event_time as `time`, + 'GRANT_RECEIVED_USD' as event_type, + COALESCE( + events.transaction_hash, + CAST(events.gitcoin_project_id as string) + ) as event_source_id, + CONCAT('GITCOIN', '_', funding_type) as event_source, + projects.project_name as to_project_name, + 'gitcoin' as from_project_name, + events.amount_in_usd as amount + from {{ ref('int_gitcoin_funding_events') }} as events + inner join {{ ref('int_gitcoin_project_directory') }} as project_directory + on events.gitcoin_project_id = project_directory.gitcoin_project_id + inner join {{ ref('projects_v1') }} as projects + on project_directory.oso_project_id = projects.project_id +), + +grants as ( + select distinct + oss_funding_data.time, + oss_funding_data.event_type, + oss_funding_data.event_source_id, + oss_funding_data.event_source, + oss_funding_data.to_project_name, + oss_funding_data.from_project_name, + oss_funding_data.amount + from oss_funding_data + + union all + + select distinct + gitcoin_data.time, + gitcoin_data.event_type, + gitcoin_data.event_source_id, + gitcoin_data.event_source, + gitcoin_data.to_project_name, + gitcoin_data.from_project_name, + gitcoin_data.amount + from gitcoin_data ) select @@ -49,21 +68,15 @@ select grants.event_type, grants.event_source_id, grants.event_source, - grants.project_name as to_project_name, + grants.to_project_name, to_projects.project_id as to_project_id, - grants.to_name, - grants.to_namespace, - grants.to_type, - grants.to_artifact_source_id, - grants.funder_name as from_project_name, + 'WALLET' as to_type, + grants.from_project_name, from_projects.project_id as from_project_id, - grants.from_name, - grants.from_namespace, - grants.from_type, - grants.from_artifact_source_id, + 'WALLET' as from_type, grants.amount from grants inner join {{ ref('projects_v1') }} as to_projects - on grants.project_name = to_projects.project_name + on grants.to_project_name = to_projects.project_name inner join {{ ref('projects_v1') }} as from_projects - on grants.funder_name = from_projects.project_name + on grants.from_project_name = from_projects.project_name diff --git a/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__matching.sql b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__matching.sql index 45f3eabeb..9df1a0839 100644 --- a/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__matching.sql +++ b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__matching.sql @@ -1,10 +1,17 @@ +with max_dlt as ( + select max(_dlt_load_id) as max_dlt_load_id + from {{ source("gitcoin", "all_matching") }} +) + select distinct round_id as round_id, round_num as round_number, chain_id, project_id as gitcoin_project_id, match_amount_in_usd as amount_in_usd, - TRIM(title) as round_title, - LOWER(recipient_address) as project_recipient_address + trim(title) as round_title, + lower(recipient_address) as project_recipient_address from {{ source("gitcoin", "all_matching") }} -where match_amount_in_usd > 0 +where + match_amount_in_usd > 0 + and _dlt_load_id = (select max_dlt.max_dlt_load_id from max_dlt) diff --git a/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_groups.sql b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_groups.sql index 81aa0bf0f..e4e15d468 100644 --- a/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_groups.sql +++ b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_groups.sql @@ -1,13 +1,19 @@ -select distinct +with max_dlt as ( + select max(_dlt_load_id) as max_dlt_load_id + from {{ source("gitcoin", "project_groups_summary") }} +) + +select group_id as gitcoin_group_id, latest_created_project_id as latest_gitcoin_project_id, total_amount_donated as total_amount_donated_in_usd, application_count as group_application_count, latest_created_application as latest_project_application_timestamp, latest_source as latest_gitcoin_data_source, - TRIM(title) as project_application_title, - LOWER(latest_payout_address) as latest_project_recipient_address, - TRIM(LOWER(latest_website)) as latest_project_website, - TRIM(LOWER(latest_project_twitter)) as latest_project_twitter, - TRIM(LOWER(latest_project_github)) as latest_project_github + trim(title) as project_application_title, + lower(latest_payout_address) as latest_project_recipient_address, + trim(lower(latest_website)) as latest_project_website, + trim(lower(latest_project_twitter)) as latest_project_twitter, + trim(lower(latest_project_github)) as latest_project_github from {{ source("gitcoin", "project_groups_summary") }} +where _dlt_load_id = (select max_dlt.max_dlt_load_id from max_dlt) diff --git a/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_lookup.sql b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_lookup.sql index 914f7286d..f570c1010 100644 --- a/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_lookup.sql +++ b/warehouse/dbt/models/staging/gitcoin/stg_gitcoin__project_lookup.sql @@ -1,5 +1,11 @@ +with max_dlt as ( + select max(_dlt_load_id) as max_dlt_load_id + from {{ source("gitcoin", "project_lookup") }} +) + select distinct group_id as gitcoin_group_id, project_id as gitcoin_project_id, source as latest_gitcoin_data_source from {{ source("gitcoin", "project_lookup") }} +where _dlt_load_id = (select max_dlt.max_dlt_load_id from max_dlt) diff --git a/warehouse/dbt/models/static_data_sources.yml b/warehouse/dbt/models/static_data_sources.yml index 6c62f731e..d72e817d6 100644 --- a/warehouse/dbt/models/static_data_sources.yml +++ b/warehouse/dbt/models/static_data_sources.yml @@ -3,8 +3,8 @@ sources: database: opensource-observer schema: static_data_sources tables: - - name: oss_funding - identifier: oss_funding + - name: oss_funding_v1 + identifier: oss_funding_v1 - name: op_rf4_trusted_addresses identifier: op_rf4_trusted_addresses - name: optimist_nft_holders diff --git a/warehouse/oso_dagster/assets/gitcoin.py b/warehouse/oso_dagster/assets/gitcoin.py index aab08d712..8eff26749 100644 --- a/warehouse/oso_dagster/assets/gitcoin.py +++ b/warehouse/oso_dagster/assets/gitcoin.py @@ -35,12 +35,15 @@ }, { "table": "project_groups_summary", + "write_disposition": "replace", }, { "table": "project_lookup", + "write_disposition": "replace", }, { "table": "all_matching", + "write_disposition": "replace", }, ], )