From 4faaaabbd7a2c0df193f4fa3ad6ebc9986b6d908 Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Wed, 19 Jun 2024 00:49:57 -0400 Subject: [PATCH] dbt: refactor `int_all_artifacts` and `int_derived_contracts` (#1674) * refactor: contracts_v1 and int_contracts * refactor: directory models * refactor: int derived contracts including proxy deployers * fix: contracts v0 model * fix: materialize metrics table * fix attempt: Materialize all onchain metrics --------- Co-authored-by: Reuven V. Gonzales --- .../databases/cloudsql/tables/tables.yaml | 2 +- dbt_project.yml | 4 + .../blockchain_artifacts/int_contracts.sql | 128 ------------ .../int_derived_contracts.sql | 25 ++- .../blockchain_artifacts/int_proxies.sql | 39 ++++ .../directory/int_all_artifacts.sql | 191 ++++-------------- .../directory/int_contracts_by_project.sql | 42 ++++ .../directory/int_deployers_by_project.sql | 93 +++++++++ .../metrics/int_code_metrics_by_project.sql | 6 + .../int_onchain_metrics_by_project.sql | 6 +- .../models/marts/directory/contracts_v0.sql | 14 ++ .../models/marts/directory/contracts_v1.sql | 16 -- 12 files changed, 259 insertions(+), 307 deletions(-) delete mode 100644 warehouse/dbt/models/intermediate/blockchain_artifacts/int_contracts.sql create mode 100644 warehouse/dbt/models/intermediate/blockchain_artifacts/int_proxies.sql create mode 100644 warehouse/dbt/models/intermediate/directory/int_contracts_by_project.sql create mode 100644 warehouse/dbt/models/intermediate/directory/int_deployers_by_project.sql create mode 100644 warehouse/dbt/models/marts/directory/contracts_v0.sql delete mode 100644 warehouse/dbt/models/marts/directory/contracts_v1.sql diff --git a/apps/hasura/metadata/databases/cloudsql/tables/tables.yaml b/apps/hasura/metadata/databases/cloudsql/tables/tables.yaml index 08131ab61..d866fabd5 100644 --- a/apps/hasura/metadata/databases/cloudsql/tables/tables.yaml +++ b/apps/hasura/metadata/databases/cloudsql/tables/tables.yaml @@ -2,7 +2,7 @@ - "!include artifacts_v1.yaml" - "!include code_metrics_by_project_v1.yaml" - "!include collections_v1.yaml" -- "!include contracts_v1.yaml" +- "!include contracts_v0.yaml" - "!include event_indexing_status_by_project_v1.yaml" - "!include event_types_v1.yaml" - "!include events_daily_to_artifact.yaml" diff --git a/dbt_project.yml b/dbt_project.yml index 0a524da02..4b4593a00 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -28,6 +28,10 @@ models: +materialized: view marts: +materialized: table + intermediate: + metrics: + onchain: + +materialized: table sources: diff --git a/warehouse/dbt/models/intermediate/blockchain_artifacts/int_contracts.sql b/warehouse/dbt/models/intermediate/blockchain_artifacts/int_contracts.sql deleted file mode 100644 index 58ff7aa88..000000000 --- a/warehouse/dbt/models/intermediate/blockchain_artifacts/int_contracts.sql +++ /dev/null @@ -1,128 +0,0 @@ -{# - TODO: This needs to be refactored or deprecated. -#} - -with deployers as ( - select - *, - 'OPTIMISM' as artifact_source - from {{ ref('stg_optimism__deployers') }} - union all - select - *, - 'BASE' as artifact_source - from {{ ref('stg_base__deployers') }} - union all - select - *, - 'FRAX' as artifact_source - from {{ ref('stg_frax__deployers') }} - union all - select - *, - 'METAL' as artifact_source - from {{ ref('stg_metal__deployers') }} - union all - select - *, - 'MODE' as artifact_source - from {{ ref('stg_mode__deployers') }} - union all - select - *, - 'ZORA' as artifact_source - from {{ ref('stg_zora__deployers') }} -), - -factories as ( - select - *, - 'OPTIMISM' as artifact_source - from {{ ref('stg_optimism__factories') }} - union all - select - *, - 'BASE' as artifact_source - from {{ ref('stg_base__factories') }} - union all - select - *, - 'FRAX' as artifact_source - from {{ ref('stg_frax__factories') }} - union all - select - *, - 'METAL' as artifact_source - from {{ ref('stg_metal__factories') }} - union all - select - *, - 'MODE' as artifact_source - from {{ ref('stg_mode__factories') }} - union all - select - *, - 'ZORA' as artifact_source - from {{ ref('stg_zora__factories') }} -), - -contract_deployments as ( - select - artifact_source, - transaction_hash, - block_timestamp, - deployer_address as root_deployer_address, - deployer_address as created_by_address, - contract_address, - deployer_address as originating_eoa_address, - 'EOA' as creator_type, - case - when contract_address in ( - select distinct factory_address - from factories - ) then 'FACTORY' - else 'CONTRACT' - end as contract_type - from deployers -), - -factory_deployments as ( - select - factories.artifact_source, - factories.transaction_hash, - factories.block_timestamp, - deployers.deployer_address as root_deployer_address, - factories.factory_address as created_by_address, - factories.contract_address, - 'FACTORY' as creator_type, - 'CONTRACT' as contract_type, - COALESCE(factories.originating_address, deployers.deployer_address) - as originating_eoa_address - from factories - inner join deployers - on factories.factory_address = deployers.contract_address -) - -select - artifact_source, - root_deployer_address, - contract_address, - contract_type, - created_by_address, - creator_type, - originating_eoa_address, - transaction_hash, - block_timestamp -from contract_deployments -union all -select - artifact_source, - root_deployer_address, - contract_address, - contract_type, - created_by_address, - creator_type, - originating_eoa_address, - transaction_hash, - block_timestamp -from factory_deployments diff --git a/warehouse/dbt/models/intermediate/blockchain_artifacts/int_derived_contracts.sql b/warehouse/dbt/models/intermediate/blockchain_artifacts/int_derived_contracts.sql index 9aee9f103..c3844f603 100644 --- a/warehouse/dbt/models/intermediate/blockchain_artifacts/int_derived_contracts.sql +++ b/warehouse/dbt/models/intermediate/blockchain_artifacts/int_derived_contracts.sql @@ -4,7 +4,6 @@ with factories_and_deployers as ( factories.transaction_hash, factories.network, factories.originating_address as deployer_address, - deployers.deployer_address as factory_deployer_address, factories.contract_address as contract_address from {{ ref("int_factories") }} as factories inner join {{ ref("int_deployers") }} as deployers @@ -17,9 +16,22 @@ with factories_and_deployers as ( transaction_hash, network, deployer_address, - null as factory_deployer_address, contract_address from {{ ref("int_deployers") }} +), + +factories_and_proxies as ( + select + factories.block_timestamp, + factories.transaction_hash, + factories.network, + proxies.address as deployer_address, + factories.contract_address as contract_address + from {{ ref("int_factories") }} as factories + inner join {{ ref("int_proxies") }} as proxies + on + factories.originating_address = proxies.address + and factories.network = proxies.network ) select @@ -27,6 +39,13 @@ select transaction_hash, network, deployer_address, - factory_deployer_address, contract_address from factories_and_deployers +union all +select + block_timestamp, + transaction_hash, + network, + deployer_address, + contract_address +from factories_and_proxies diff --git a/warehouse/dbt/models/intermediate/blockchain_artifacts/int_proxies.sql b/warehouse/dbt/models/intermediate/blockchain_artifacts/int_proxies.sql new file mode 100644 index 000000000..abb17c5f9 --- /dev/null +++ b/warehouse/dbt/models/intermediate/blockchain_artifacts/int_proxies.sql @@ -0,0 +1,39 @@ +{# any from address coming out of a proxy #} + +{% set networks = ["optimism", "base", "frax", "metal", "mode", "zora"] %} + +{% set union_queries = [] %} + +{% for network in networks %} + {% set table_name = "stg_" ~ network ~ "__proxies" %} + {% set network_upper = network.upper() %} + + {% set query %} + select + lower(to_address) as `address`, + lower(proxy_address) as proxy_address, + '{{ network_upper }}' as network, + min(block_timestamp) as created_date + from {{ ref(table_name) }} + where proxy_address != to_address + group by + to_address, + proxy_address + {% endset %} + + {% do union_queries.append(query) %} +{% endfor %} + +{% set final_query = union_queries | join(' union all ') %} + +with proxies as ( + {{ final_query }} +) + +select + {{ oso_id("network", "address") }} as artifact_id, + address, + proxy_address, + network, + created_date +from proxies diff --git a/warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql b/warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql index f88c9c63e..4b47f191d 100644 --- a/warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql +++ b/warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql @@ -8,211 +8,90 @@ in the future. #} -with all_repos as ( - {# - Currently this is just Github. - oss-directory needs some refactoring to support multiple repository providers - #} - select - "GITHUB" as artifact_source, - "REPOSITORY" as artifact_type, - projects.project_id, - repos.owner as artifact_namespace, - repos.name as artifact_name, - repos.url as artifact_url, - CAST(repos.id as STRING) as artifact_source_id - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.github)) as github - inner join - {{ ref('stg_ossd__current_repositories') }} as repos - on - LOWER(CONCAT("https://github.com/", repos.owner)) - = LOWER(JSON_VALUE(github.url)) - or LOWER(repos.url) = LOWER(JSON_VALUE(github.url)) -), - -all_npm_raw as ( - select - "NPM" as artifact_source, - "PACKAGE" as artifact_type, - projects.project_id, - JSON_VALUE(npm.url) as artifact_source_id, - case - when - JSON_VALUE(npm.url) like "https://npmjs.com/package/%" - then SUBSTR(JSON_VALUE(npm.url), 28) - when - JSON_VALUE(npm.url) like "https://www.npmjs.com/package/%" - then SUBSTR(JSON_VALUE(npm.url), 31) - end as artifact_name, - JSON_VALUE(npm.url) as artifact_url - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.npm)) as npm -), - -all_npm as ( +with ossd_artifacts as ( select project_id, + artifact_id, artifact_source_id, artifact_source, artifact_type, + artifact_namespace, artifact_name, - artifact_url, - SPLIT(REPLACE(artifact_name, "@", ""), "/")[SAFE_OFFSET(0)] - as artifact_namespace - from all_npm_raw -), - -ossd_blockchain as ( - select - projects.project_id, - tag as artifact_type, - network as artifact_namespace, - network as artifact_source, - JSON_VALUE(blockchains.address) as artifact_source_id, - JSON_VALUE(blockchains.address) as artifact_name, - JSON_VALUE(blockchains.address) as artifact_url - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.blockchain)) as blockchains - cross join - UNNEST(JSON_VALUE_ARRAY(blockchains.networks)) as network - cross join - UNNEST(JSON_VALUE_ARRAY(blockchains.tags)) as tag -), - -all_deployers as ( - select - *, - "MAINNET" as artifact_namespace, - "ETHEREUM" as artifact_source - from {{ ref("stg_ethereum__deployers") }} - union all - select - *, - "ARBITRUM_ONE" as artifact_namespace, - "ARBITRUM_ONE" as artifact_source - from {{ ref("stg_arbitrum__deployers") }} - union all - {# Includes all deployers of a contract #} - select - block_timestamp, - transaction_hash, - deployer_address, - contract_address, - UPPER(network) as artifact_namespace, - UPPER(network) as artifact_source - from {{ ref("int_derived_contracts") }} - union all - {# Includes all factory deployers of a contract #} - select - block_timestamp, - transaction_hash, - factory_deployer_address as deployer_address, - contract_address, - UPPER(network) as artifact_namespace, - UPPER(network) as artifact_source - from {{ ref("int_derived_contracts") }} + artifact_url + from {{ ref("int_artifacts_in_ossd_by_project") }} + where + artifact_type != 'DEPLOYER' + and artifact_source != 'ANY_EVM' ), -discovered_contracts as ( +verified_deployers as ( select - "CONTRACT" as artifact_type, - ob.project_id, - ad.contract_address as artifact_source_id, - ob.artifact_source, - ob.artifact_namespace, - ad.contract_address as artifact_name, - ad.contract_address as artifact_url - from ossd_blockchain as ob - inner join all_deployers as ad - on - ob.artifact_source_id = ad.deployer_address - {# - We currently do not really have a notion of namespace in - oss-directory. We may need to change this when that time comes - #} - and UPPER(ob.artifact_source) in (UPPER(ad.artifact_source), "ANY_EVM") - and UPPER(ob.artifact_namespace) in ( - UPPER(ad.artifact_namespace), "ANY_EVM" - ) - and UPPER(ob.artifact_type) in ("EOA", "DEPLOYER", "FACTORY") + project_id, + artifact_id, + artifact_source_id, + artifact_source, + 'DEPLOYER' as artifact_type, + artifact_namespace, + artifact_name, + artifact_name as artifact_url + from {{ ref("int_deployers_by_project") }} ), -all_artifacts as ( +verified_contracts as ( select project_id, + artifact_id, artifact_source_id, artifact_source, - artifact_type, + 'CONTRACT' as artifact_type, artifact_namespace, artifact_name, - artifact_url - from - all_repos - union all + artifact_name as artifact_url + from {{ ref("int_contracts_by_project") }} +), + +all_normalized_artifacts as ( select project_id, + artifact_id, artifact_source_id, artifact_source, artifact_type, artifact_namespace, artifact_name, artifact_url - from - ossd_blockchain + from ossd_artifacts union all select project_id, + artifact_id, artifact_source_id, artifact_source, artifact_type, artifact_namespace, artifact_name, artifact_url - from - discovered_contracts + from verified_deployers union all select project_id, + artifact_id, artifact_source_id, artifact_source, artifact_type, artifact_namespace, artifact_name, artifact_url - from - all_npm -), - -all_normalized_artifacts as ( - select distinct - project_id, - LOWER(artifact_source_id) as artifact_source_id, - {# - artifact_source and artifact_type are considered internal constants hence - we apply an UPPER transform - #} - UPPER(artifact_source) as artifact_source, - UPPER(artifact_type) as artifact_type, - LOWER(artifact_namespace) as artifact_namespace, - LOWER(artifact_name) as artifact_name, - LOWER(artifact_url) as artifact_url - from all_artifacts + from verified_contracts ) -select +select distinct project_id, - {{ oso_id("a.artifact_source", "a.artifact_source_id") }} as `artifact_id`, + artifact_id, artifact_source_id, artifact_source, artifact_namespace, artifact_name, artifact_url, artifact_type -from all_normalized_artifacts as a +from all_normalized_artifacts diff --git a/warehouse/dbt/models/intermediate/directory/int_contracts_by_project.sql b/warehouse/dbt/models/intermediate/directory/int_contracts_by_project.sql new file mode 100644 index 000000000..0190e3808 --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_contracts_by_project.sql @@ -0,0 +1,42 @@ +with contracts as ( + select + block_timestamp, + transaction_hash, + network, + deployer_address, + contract_address + from {{ ref('int_derived_contracts') }} +), + +deployers as ( + select + project_id, + artifact_name as deployer_address, + artifact_source as network + from {{ ref('int_deployers_by_project') }} +), + +discovered_contracts as ( + select + deployers.project_id, + contracts.block_timestamp, + contracts.transaction_hash, + contracts.network, + contracts.deployer_address, + contracts.contract_address + from contracts + left join deployers + on + contracts.deployer_address = deployers.deployer_address + and contracts.network = deployers.network + where deployers.deployer_address is not null +) + +select + project_id, + {{ oso_id("network", "contract_address") }} as artifact_id, + network as artifact_source, + contract_address as artifact_source_id, + LOWER(network) as artifact_namespace, + contract_address as artifact_name +from discovered_contracts diff --git a/warehouse/dbt/models/intermediate/directory/int_deployers_by_project.sql b/warehouse/dbt/models/intermediate/directory/int_deployers_by_project.sql new file mode 100644 index 000000000..b4927b96e --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_deployers_by_project.sql @@ -0,0 +1,93 @@ +{# TODO: Add Ethereum / Mainnet and Arbitrum (One) to list of networks #} + +{% set networks = ["optimism", "base", "frax", "metal", "mode", "zora"] %} + +{% set union_queries = [] %} + +{% for network in networks %} + {% set table_name = "stg_" ~ network ~ "__deployers" %} + {% set network_upper = network.upper() %} + + {% set query %} + select distinct + deployer_address, + '{{ network_upper }}' as network + from {{ ref(table_name) }} + {% endset %} + + {% do union_queries.append(query) %} +{% endfor %} + +{% set final_query = union_queries | join(' union all ') %} + +with all_deployers as ( + {{ final_query }} +), + +known_deployers as ( + select distinct + project_id, + artifact_source, + artifact_name + from {{ ref('int_artifacts_in_ossd_by_project') }} + where artifact_type = 'DEPLOYER' +), + +any_evm_deployers as ( + select + known_deployers.project_id, + all_deployers.deployer_address, + all_deployers.network + from all_deployers + left join known_deployers + on all_deployers.deployer_address = known_deployers.artifact_name + where + known_deployers.project_id is not null + and known_deployers.artifact_source = 'ANY_EVM' +), + +chain_specific_deployers as ( + select + known_deployers.project_id, + all_deployers.deployer_address, + all_deployers.network + from all_deployers + left join known_deployers + on + all_deployers.deployer_address = known_deployers.artifact_name + and all_deployers.network = known_deployers.artifact_source + where + known_deployers.project_id is not null + and known_deployers.artifact_source != 'ANY_EVM' +), + +verified_deployers as ( + select + project_id, + deployer_address, + network + from any_evm_deployers + union all + select + project_id, + deployer_address, + network + from chain_specific_deployers +), + +deployers as ( + select distinct + project_id, + deployer_address as artifact_name, + network as artifact_source + from verified_deployers +) + +select + project_id, + {{ oso_id("artifact_source", "artifact_name") }} as artifact_id, + artifact_source, + artifact_name as artifact_source_id, + LOWER(artifact_source) as artifact_namespace, + artifact_name +from deployers diff --git a/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql index 4d02f91fe..74881de43 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql @@ -1,3 +1,9 @@ +{{ + config( + materialized='table' + ) +}} + with metrics as ( select * from {{ ref('int_code_metric__active_developers') }} union all diff --git a/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql index af038687d..6a619e3f1 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql @@ -1,7 +1,7 @@ {{ - config(meta = { - 'sync_to_db': False - }) + config( + materialized='table' + ) }} with metrics as ( diff --git a/warehouse/dbt/models/marts/directory/contracts_v0.sql b/warehouse/dbt/models/marts/directory/contracts_v0.sql new file mode 100644 index 000000000..9d5c32428 --- /dev/null +++ b/warehouse/dbt/models/marts/directory/contracts_v0.sql @@ -0,0 +1,14 @@ +{{ + config(meta = { + 'sync_to_db': True, + 'index': { + 'idx_deployer': ["root_deployer_address"], + } + }) +}} + +select distinct + network as artifact_source, + deployer_address as root_deployer_address, + contract_address +from {{ ref('int_derived_contracts') }} diff --git a/warehouse/dbt/models/marts/directory/contracts_v1.sql b/warehouse/dbt/models/marts/directory/contracts_v1.sql deleted file mode 100644 index 8d82c0cab..000000000 --- a/warehouse/dbt/models/marts/directory/contracts_v1.sql +++ /dev/null @@ -1,16 +0,0 @@ -{{ - config(meta = { - 'sync_to_db': True, - 'index': { - 'idx_deployer': ["root_deployer_address"], - } - }) -}} - -select distinct - artifact_source, - root_deployer_address, - contract_address, - contract_type -from {{ ref('int_contracts') }} -where root_deployer_address is not null